first commit

2026-03-11 16:13:59 +08:00 · 2026-03-11 16:13:59 +08:00 · 048860986f
commit 048860986f
220 changed files with 33672 additions and 0 deletions
--- a/.gitignore
+++ b/.gitignore
@ -0,0 +1,73 @@
+# Python
+__pycache__/
+*.py[cod]
+*.pyo
+*.pyd
+.Python
+*.egg-info/
+dist/
+build/
+.eggs/
+
+# 虛擬環境
+venv/
+env/
+.env/
+
+# 模型權重與二進位檔
+*.pt
+*.pth
+*.onnx
+*.bie
+*.nef
+*.engine
+*.tflite
+*.pb
+
+# 訓練輸出（很大，不放 git）
+runs/
+work_dirs/
+
+# 資料集圖片（不放 git，改用 DVC 或外部儲存）
+data4/
+data50/
+test14data/
+teachabledata/
+numberocr/
+
+# ONNX 輸出目錄
+onnx/
+
+# NumPy 暫存
+npy/
+
+# 暫放區
+暫放區/
+
+# 權重目錄（除了下載腳本）
+weights/*.pt
+weights/*.pth
+
+# Claude Code 設定
+.claude/
+
+# Jupyter Notebook 輸出
+.ipynb_checkpoints/
+
+# 系統檔
+.DS_Store
+Thumbs.db
+desktop.ini
+
+# IDE
+.vscode/
+.idea/
+*.swp
+*.swo
+
+# 日誌
+*.log
+wandb/
+
+# 空的佔位檔
+python
--- a/52
+++ b/52
@ -0,0 +1,52 @@
+# Start FROM Nvidia PyTorch image https://ngc.nvidia.com/catalog/containers/nvidia:pytorch
+FROM nvcr.io/nvidia/pytorch:20.10-py3
+
+# Install dependencies
+RUN pip install --upgrade pip
+# COPY requirements.txt .
+# RUN pip install -r requirements.txt
+RUN pip install gsutil
+
+# Create working directory
+RUN mkdir -p /usr/src/app
+WORKDIR /usr/src/app
+
+# Copy contents
+COPY . /usr/src/app
+
+# Copy weights
+#RUN python3 -c "from models import *; \
+#attempt_download('weights/yolov5s.pt'); \
+#attempt_download('weights/yolov5m.pt'); \
+#attempt_download('weights/yolov5l.pt')"
+
+
+# ---------------------------------------------------  Extras Below  ---------------------------------------------------
+
+# Build and Push
+# t=ultralytics/yolov5:latest && sudo docker build -t $t . && sudo docker push $t
+# for v in {300..303}; do t=ultralytics/coco:v$v && sudo docker build -t $t . && sudo docker push $t; done
+
+# Pull and Run
+# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host $t
+
+# Pull and Run with local directory access
+# t=ultralytics/yolov5:latest && sudo docker pull $t && sudo docker run -it --ipc=host --gpus all -v "$(pwd)"/coco:/usr/src/coco $t
+
+# Kill all
+# sudo docker kill $(sudo docker ps -q)
+
+# Kill all image-based
+# sudo docker kill $(sudo docker ps -a -q --filter ancestor=ultralytics/yolov5:latest)
+
+# Bash into running container
+# sudo docker container exec -it ba65811811ab bash
+
+# Bash into stopped container
+# sudo docker commit 092b16b25c5b usr/resume && sudo docker run -it --gpus all --ipc=host -v "$(pwd)"/coco:/usr/src/coco --entrypoint=sh usr/resume
+
+# Send weights to GCP
+# python -c "from utils.general import *; strip_optimizer('runs/train/exp0_*/weights/best.pt', 'tmp.pt')" && gsutil cp tmp.pt gs://*.pt
+
+# Clean up
+# docker system prune -a --volumes
--- a/674
+++ b/674
@ -0,0 +1,674 @@
+GNU GENERAL PUBLIC LICENSE
+                       Version 3, 29 June 2007
+
+ Copyright (C) 2007 Free Software Foundation, Inc. <http://fsf.org/>
+ Everyone is permitted to copy and distribute verbatim copies
+ of this license document, but changing it is not allowed.
+
+                            Preamble
+
+  The GNU General Public License is a free, copyleft license for
+software and other kinds of works.
+
+  The licenses for most software and other practical works are designed
+to take away your freedom to share and change the works.  By contrast,
+the GNU General Public License is intended to guarantee your freedom to
+share and change all versions of a program--to make sure it remains free
+software for all its users.  We, the Free Software Foundation, use the
+GNU General Public License for most of our software; it applies also to
+any other work released this way by its authors.  You can apply it to
+your programs, too.
+
+  When we speak of free software, we are referring to freedom, not
+price.  Our General Public Licenses are designed to make sure that you
+have the freedom to distribute copies of free software (and charge for
+them if you wish), that you receive source code or can get it if you
+want it, that you can change the software or use pieces of it in new
+free programs, and that you know you can do these things.
+
+  To protect your rights, we need to prevent others from denying you
+these rights or asking you to surrender the rights.  Therefore, you have
+certain responsibilities if you distribute copies of the software, or if
+you modify it: responsibilities to respect the freedom of others.
+
+  For example, if you distribute copies of such a program, whether
+gratis or for a fee, you must pass on to the recipients the same
+freedoms that you received.  You must make sure that they, too, receive
+or can get the source code.  And you must show them these terms so they
+know their rights.
+
+  Developers that use the GNU GPL protect your rights with two steps:
+(1) assert copyright on the software, and (2) offer you this License
+giving you legal permission to copy, distribute and/or modify it.
+
+  For the developers' and authors' protection, the GPL clearly explains
+that there is no warranty for this free software.  For both users' and
+authors' sake, the GPL requires that modified versions be marked as
+changed, so that their problems will not be attributed erroneously to
+authors of previous versions.
+
+  Some devices are designed to deny users access to install or run
+modified versions of the software inside them, although the manufacturer
+can do so.  This is fundamentally incompatible with the aim of
+protecting users' freedom to change the software.  The systematic
+pattern of such abuse occurs in the area of products for individuals to
+use, which is precisely where it is most unacceptable.  Therefore, we
+have designed this version of the GPL to prohibit the practice for those
+products.  If such problems arise substantially in other domains, we
+stand ready to extend this provision to those domains in future versions
+of the GPL, as needed to protect the freedom of users.
+
+  Finally, every program is threatened constantly by software patents.
+States should not allow patents to restrict development and use of
+software on general-purpose computers, but in those that do, we wish to
+avoid the special danger that patents applied to a free program could
+make it effectively proprietary.  To prevent this, the GPL assures that
+patents cannot be used to render the program non-free.
+
+  The precise terms and conditions for copying, distribution and
+modification follow.
+
+                       TERMS AND CONDITIONS
+
+  0. Definitions.
+
+  "This License" refers to version 3 of the GNU General Public License.
+
+  "Copyright" also means copyright-like laws that apply to other kinds of
+works, such as semiconductor masks.
+
+  "The Program" refers to any copyrightable work licensed under this
+License.  Each licensee is addressed as "you".  "Licensees" and
+"recipients" may be individuals or organizations.
+
+  To "modify" a work means to copy from or adapt all or part of the work
+in a fashion requiring copyright permission, other than the making of an
+exact copy.  The resulting work is called a "modified version" of the
+earlier work or a work "based on" the earlier work.
+
+  A "covered work" means either the unmodified Program or a work based
+on the Program.
+
+  To "propagate" a work means to do anything with it that, without
+permission, would make you directly or secondarily liable for
+infringement under applicable copyright law, except executing it on a
+computer or modifying a private copy.  Propagation includes copying,
+distribution (with or without modification), making available to the
+public, and in some countries other activities as well.
+
+  To "convey" a work means any kind of propagation that enables other
+parties to make or receive copies.  Mere interaction with a user through
+a computer network, with no transfer of a copy, is not conveying.
+
+  An interactive user interface displays "Appropriate Legal Notices"
+to the extent that it includes a convenient and prominently visible
+feature that (1) displays an appropriate copyright notice, and (2)
+tells the user that there is no warranty for the work (except to the
+extent that warranties are provided), that licensees may convey the
+work under this License, and how to view a copy of this License.  If
+the interface presents a list of user commands or options, such as a
+menu, a prominent item in the list meets this criterion.
+
+  1. Source Code.
+
+  The "source code" for a work means the preferred form of the work
+for making modifications to it.  "Object code" means any non-source
+form of a work.
+
+  A "Standard Interface" means an interface that either is an official
+standard defined by a recognized standards body, or, in the case of
+interfaces specified for a particular programming language, one that
+is widely used among developers working in that language.
+
+  The "System Libraries" of an executable work include anything, other
+than the work as a whole, that (a) is included in the normal form of
+packaging a Major Component, but which is not part of that Major
+Component, and (b) serves only to enable use of the work with that
+Major Component, or to implement a Standard Interface for which an
+implementation is available to the public in source code form.  A
+"Major Component", in this context, means a major essential component
+(kernel, window system, and so on) of the specific operating system
+(if any) on which the executable work runs, or a compiler used to
+produce the work, or an object code interpreter used to run it.
+
+  The "Corresponding Source" for a work in object code form means all
+the source code needed to generate, install, and (for an executable
+work) run the object code and to modify the work, including scripts to
+control those activities.  However, it does not include the work's
+System Libraries, or general-purpose tools or generally available free
+programs which are used unmodified in performing those activities but
+which are not part of the work.  For example, Corresponding Source
+includes interface definition files associated with source files for
+the work, and the source code for shared libraries and dynamically
+linked subprograms that the work is specifically designed to require,
+such as by intimate data communication or control flow between those
+subprograms and other parts of the work.
+
+  The Corresponding Source need not include anything that users
+can regenerate automatically from other parts of the Corresponding
+Source.
+
+  The Corresponding Source for a work in source code form is that
+same work.
+
+  2. Basic Permissions.
+
+  All rights granted under this License are granted for the term of
+copyright on the Program, and are irrevocable provided the stated
+conditions are met.  This License explicitly affirms your unlimited
+permission to run the unmodified Program.  The output from running a
+covered work is covered by this License only if the output, given its
+content, constitutes a covered work.  This License acknowledges your
+rights of fair use or other equivalent, as provided by copyright law.
+
+  You may make, run and propagate covered works that you do not
+convey, without conditions so long as your license otherwise remains
+in force.  You may convey covered works to others for the sole purpose
+of having them make modifications exclusively for you, or provide you
+with facilities for running those works, provided that you comply with
+the terms of this License in conveying all material for which you do
+not control copyright.  Those thus making or running the covered works
+for you must do so exclusively on your behalf, under your direction
+and control, on terms that prohibit them from making any copies of
+your copyrighted material outside their relationship with you.
+
+  Conveying under any other circumstances is permitted solely under
+the conditions stated below.  Sublicensing is not allowed; section 10
+makes it unnecessary.
+
+  3. Protecting Users' Legal Rights From Anti-Circumvention Law.
+
+  No covered work shall be deemed part of an effective technological
+measure under any applicable law fulfilling obligations under article
+11 of the WIPO copyright treaty adopted on 20 December 1996, or
+similar laws prohibiting or restricting circumvention of such
+measures.
+
+  When you convey a covered work, you waive any legal power to forbid
+circumvention of technological measures to the extent such circumvention
+is effected by exercising rights under this License with respect to
+the covered work, and you disclaim any intention to limit operation or
+modification of the work as a means of enforcing, against the work's
+users, your or third parties' legal rights to forbid circumvention of
+technological measures.
+
+  4. Conveying Verbatim Copies.
+
+  You may convey verbatim copies of the Program's source code as you
+receive it, in any medium, provided that you conspicuously and
+appropriately publish on each copy an appropriate copyright notice;
+keep intact all notices stating that this License and any
+non-permissive terms added in accord with section 7 apply to the code;
+keep intact all notices of the absence of any warranty; and give all
+recipients a copy of this License along with the Program.
+
+  You may charge any price or no price for each copy that you convey,
+and you may offer support or warranty protection for a fee.
+
+  5. Conveying Modified Source Versions.
+
+  You may convey a work based on the Program, or the modifications to
+produce it from the Program, in the form of source code under the
+terms of section 4, provided that you also meet all of these conditions:
+
+    a) The work must carry prominent notices stating that you modified
+    it, and giving a relevant date.
+
+    b) The work must carry prominent notices stating that it is
+    released under this License and any conditions added under section
+    7.  This requirement modifies the requirement in section 4 to
+    "keep intact all notices".
+
+    c) You must license the entire work, as a whole, under this
+    License to anyone who comes into possession of a copy.  This
+    License will therefore apply, along with any applicable section 7
+    additional terms, to the whole of the work, and all its parts,
+    regardless of how they are packaged.  This License gives no
+    permission to license the work in any other way, but it does not
+    invalidate such permission if you have separately received it.
+
+    d) If the work has interactive user interfaces, each must display
+    Appropriate Legal Notices; however, if the Program has interactive
+    interfaces that do not display Appropriate Legal Notices, your
+    work need not make them do so.
+
+  A compilation of a covered work with other separate and independent
+works, which are not by their nature extensions of the covered work,
+and which are not combined with it such as to form a larger program,
+in or on a volume of a storage or distribution medium, is called an
+"aggregate" if the compilation and its resulting copyright are not
+used to limit the access or legal rights of the compilation's users
+beyond what the individual works permit.  Inclusion of a covered work
+in an aggregate does not cause this License to apply to the other
+parts of the aggregate.
+
+  6. Conveying Non-Source Forms.
+
+  You may convey a covered work in object code form under the terms
+of sections 4 and 5, provided that you also convey the
+machine-readable Corresponding Source under the terms of this License,
+in one of these ways:
+
+    a) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by the
+    Corresponding Source fixed on a durable physical medium
+    customarily used for software interchange.
+
+    b) Convey the object code in, or embodied in, a physical product
+    (including a physical distribution medium), accompanied by a
+    written offer, valid for at least three years and valid for as
+    long as you offer spare parts or customer support for that product
+    model, to give anyone who possesses the object code either (1) a
+    copy of the Corresponding Source for all the software in the
+    product that is covered by this License, on a durable physical
+    medium customarily used for software interchange, for a price no
+    more than your reasonable cost of physically performing this
+    conveying of source, or (2) access to copy the
+    Corresponding Source from a network server at no charge.
+
+    c) Convey individual copies of the object code with a copy of the
+    written offer to provide the Corresponding Source.  This
+    alternative is allowed only occasionally and noncommercially, and
+    only if you received the object code with such an offer, in accord
+    with subsection 6b.
+
+    d) Convey the object code by offering access from a designated
+    place (gratis or for a charge), and offer equivalent access to the
+    Corresponding Source in the same way through the same place at no
+    further charge.  You need not require recipients to copy the
+    Corresponding Source along with the object code.  If the place to
+    copy the object code is a network server, the Corresponding Source
+    may be on a different server (operated by you or a third party)
+    that supports equivalent copying facilities, provided you maintain
+    clear directions next to the object code saying where to find the
+    Corresponding Source.  Regardless of what server hosts the
+    Corresponding Source, you remain obligated to ensure that it is
+    available for as long as needed to satisfy these requirements.
+
+    e) Convey the object code using peer-to-peer transmission, provided
+    you inform other peers where the object code and Corresponding
+    Source of the work are being offered to the general public at no
+    charge under subsection 6d.
+
+  A separable portion of the object code, whose source code is excluded
+from the Corresponding Source as a System Library, need not be
+included in conveying the object code work.
+
+  A "User Product" is either (1) a "consumer product", which means any
+tangible personal property which is normally used for personal, family,
+or household purposes, or (2) anything designed or sold for incorporation
+into a dwelling.  In determining whether a product is a consumer product,
+doubtful cases shall be resolved in favor of coverage.  For a particular
+product received by a particular user, "normally used" refers to a
+typical or common use of that class of product, regardless of the status
+of the particular user or of the way in which the particular user
+actually uses, or expects or is expected to use, the product.  A product
+is a consumer product regardless of whether the product has substantial
+commercial, industrial or non-consumer uses, unless such uses represent
+the only significant mode of use of the product.
+
+  "Installation Information" for a User Product means any methods,
+procedures, authorization keys, or other information required to install
+and execute modified versions of a covered work in that User Product from
+a modified version of its Corresponding Source.  The information must
+suffice to ensure that the continued functioning of the modified object
+code is in no case prevented or interfered with solely because
+modification has been made.
+
+  If you convey an object code work under this section in, or with, or
+specifically for use in, a User Product, and the conveying occurs as
+part of a transaction in which the right of possession and use of the
+User Product is transferred to the recipient in perpetuity or for a
+fixed term (regardless of how the transaction is characterized), the
+Corresponding Source conveyed under this section must be accompanied
+by the Installation Information.  But this requirement does not apply
+if neither you nor any third party retains the ability to install
+modified object code on the User Product (for example, the work has
+been installed in ROM).
+
+  The requirement to provide Installation Information does not include a
+requirement to continue to provide support service, warranty, or updates
+for a work that has been modified or installed by the recipient, or for
+the User Product in which it has been modified or installed.  Access to a
+network may be denied when the modification itself materially and
+adversely affects the operation of the network or violates the rules and
+protocols for communication across the network.
+
+  Corresponding Source conveyed, and Installation Information provided,
+in accord with this section must be in a format that is publicly
+documented (and with an implementation available to the public in
+source code form), and must require no special password or key for
+unpacking, reading or copying.
+
+  7. Additional Terms.
+
+  "Additional permissions" are terms that supplement the terms of this
+License by making exceptions from one or more of its conditions.
+Additional permissions that are applicable to the entire Program shall
+be treated as though they were included in this License, to the extent
+that they are valid under applicable law.  If additional permissions
+apply only to part of the Program, that part may be used separately
+under those permissions, but the entire Program remains governed by
+this License without regard to the additional permissions.
+
+  When you convey a copy of a covered work, you may at your option
+remove any additional permissions from that copy, or from any part of
+it.  (Additional permissions may be written to require their own
+removal in certain cases when you modify the work.)  You may place
+additional permissions on material, added by you to a covered work,
+for which you have or can give appropriate copyright permission.
+
+  Notwithstanding any other provision of this License, for material you
+add to a covered work, you may (if authorized by the copyright holders of
+that material) supplement the terms of this License with terms:
+
+    a) Disclaiming warranty or limiting liability differently from the
+    terms of sections 15 and 16 of this License; or
+
+    b) Requiring preservation of specified reasonable legal notices or
+    author attributions in that material or in the Appropriate Legal
+    Notices displayed by works containing it; or
+
+    c) Prohibiting misrepresentation of the origin of that material, or
+    requiring that modified versions of such material be marked in
+    reasonable ways as different from the original version; or
+
+    d) Limiting the use for publicity purposes of names of licensors or
+    authors of the material; or
+
+    e) Declining to grant rights under trademark law for use of some
+    trade names, trademarks, or service marks; or
+
+    f) Requiring indemnification of licensors and authors of that
+    material by anyone who conveys the material (or modified versions of
+    it) with contractual assumptions of liability to the recipient, for
+    any liability that these contractual assumptions directly impose on
+    those licensors and authors.
+
+  All other non-permissive additional terms are considered "further
+restrictions" within the meaning of section 10.  If the Program as you
+received it, or any part of it, contains a notice stating that it is
+governed by this License along with a term that is a further
+restriction, you may remove that term.  If a license document contains
+a further restriction but permits relicensing or conveying under this
+License, you may add to a covered work material governed by the terms
+of that license document, provided that the further restriction does
+not survive such relicensing or conveying.
+
+  If you add terms to a covered work in accord with this section, you
+must place, in the relevant source files, a statement of the
+additional terms that apply to those files, or a notice indicating
+where to find the applicable terms.
+
+  Additional terms, permissive or non-permissive, may be stated in the
+form of a separately written license, or stated as exceptions;
+the above requirements apply either way.
+
+  8. Termination.
+
+  You may not propagate or modify a covered work except as expressly
+provided under this License.  Any attempt otherwise to propagate or
+modify it is void, and will automatically terminate your rights under
+this License (including any patent licenses granted under the third
+paragraph of section 11).
+
+  However, if you cease all violation of this License, then your
+license from a particular copyright holder is reinstated (a)
+provisionally, unless and until the copyright holder explicitly and
+finally terminates your license, and (b) permanently, if the copyright
+holder fails to notify you of the violation by some reasonable means
+prior to 60 days after the cessation.
+
+  Moreover, your license from a particular copyright holder is
+reinstated permanently if the copyright holder notifies you of the
+violation by some reasonable means, this is the first time you have
+received notice of violation of this License (for any work) from that
+copyright holder, and you cure the violation prior to 30 days after
+your receipt of the notice.
+
+  Termination of your rights under this section does not terminate the
+licenses of parties who have received copies or rights from you under
+this License.  If your rights have been terminated and not permanently
+reinstated, you do not qualify to receive new licenses for the same
+material under section 10.
+
+  9. Acceptance Not Required for Having Copies.
+
+  You are not required to accept this License in order to receive or
+run a copy of the Program.  Ancillary propagation of a covered work
+occurring solely as a consequence of using peer-to-peer transmission
+to receive a copy likewise does not require acceptance.  However,
+nothing other than this License grants you permission to propagate or
+modify any covered work.  These actions infringe copyright if you do
+not accept this License.  Therefore, by modifying or propagating a
+covered work, you indicate your acceptance of this License to do so.
+
+  10. Automatic Licensing of Downstream Recipients.
+
+  Each time you convey a covered work, the recipient automatically
+receives a license from the original licensors, to run, modify and
+propagate that work, subject to this License.  You are not responsible
+for enforcing compliance by third parties with this License.
+
+  An "entity transaction" is a transaction transferring control of an
+organization, or substantially all assets of one, or subdividing an
+organization, or merging organizations.  If propagation of a covered
+work results from an entity transaction, each party to that
+transaction who receives a copy of the work also receives whatever
+licenses to the work the party's predecessor in interest had or could
+give under the previous paragraph, plus a right to possession of the
+Corresponding Source of the work from the predecessor in interest, if
+the predecessor has it or can get it with reasonable efforts.
+
+  You may not impose any further restrictions on the exercise of the
+rights granted or affirmed under this License.  For example, you may
+not impose a license fee, royalty, or other charge for exercise of
+rights granted under this License, and you may not initiate litigation
+(including a cross-claim or counterclaim in a lawsuit) alleging that
+any patent claim is infringed by making, using, selling, offering for
+sale, or importing the Program or any portion of it.
+
+  11. Patents.
+
+  A "contributor" is a copyright holder who authorizes use under this
+License of the Program or a work on which the Program is based.  The
+work thus licensed is called the contributor's "contributor version".
+
+  A contributor's "essential patent claims" are all patent claims
+owned or controlled by the contributor, whether already acquired or
+hereafter acquired, that would be infringed by some manner, permitted
+by this License, of making, using, or selling its contributor version,
+but do not include claims that would be infringed only as a
+consequence of further modification of the contributor version.  For
+purposes of this definition, "control" includes the right to grant
+patent sublicenses in a manner consistent with the requirements of
+this License.
+
+  Each contributor grants you a non-exclusive, worldwide, royalty-free
+patent license under the contributor's essential patent claims, to
+make, use, sell, offer for sale, import and otherwise run, modify and
+propagate the contents of its contributor version.
+
+  In the following three paragraphs, a "patent license" is any express
+agreement or commitment, however denominated, not to enforce a patent
+(such as an express permission to practice a patent or covenant not to
+sue for patent infringement).  To "grant" such a patent license to a
+party means to make such an agreement or commitment not to enforce a
+patent against the party.
+
+  If you convey a covered work, knowingly relying on a patent license,
+and the Corresponding Source of the work is not available for anyone
+to copy, free of charge and under the terms of this License, through a
+publicly available network server or other readily accessible means,
+then you must either (1) cause the Corresponding Source to be so
+available, or (2) arrange to deprive yourself of the benefit of the
+patent license for this particular work, or (3) arrange, in a manner
+consistent with the requirements of this License, to extend the patent
+license to downstream recipients.  "Knowingly relying" means you have
+actual knowledge that, but for the patent license, your conveying the
+covered work in a country, or your recipient's use of the covered work
+in a country, would infringe one or more identifiable patents in that
+country that you have reason to believe are valid.
+
+  If, pursuant to or in connection with a single transaction or
+arrangement, you convey, or propagate by procuring conveyance of, a
+covered work, and grant a patent license to some of the parties
+receiving the covered work authorizing them to use, propagate, modify
+or convey a specific copy of the covered work, then the patent license
+you grant is automatically extended to all recipients of the covered
+work and works based on it.
+
+  A patent license is "discriminatory" if it does not include within
+the scope of its coverage, prohibits the exercise of, or is
+conditioned on the non-exercise of one or more of the rights that are
+specifically granted under this License.  You may not convey a covered
+work if you are a party to an arrangement with a third party that is
+in the business of distributing software, under which you make payment
+to the third party based on the extent of your activity of conveying
+the work, and under which the third party grants, to any of the
+parties who would receive the covered work from you, a discriminatory
+patent license (a) in connection with copies of the covered work
+conveyed by you (or copies made from those copies), or (b) primarily
+for and in connection with specific products or compilations that
+contain the covered work, unless you entered into that arrangement,
+or that patent license was granted, prior to 28 March 2007.
+
+  Nothing in this License shall be construed as excluding or limiting
+any implied license or other defenses to infringement that may
+otherwise be available to you under applicable patent law.
+
+  12. No Surrender of Others' Freedom.
+
+  If conditions are imposed on you (whether by court order, agreement or
+otherwise) that contradict the conditions of this License, they do not
+excuse you from the conditions of this License.  If you cannot convey a
+covered work so as to satisfy simultaneously your obligations under this
+License and any other pertinent obligations, then as a consequence you may
+not convey it at all.  For example, if you agree to terms that obligate you
+to collect a royalty for further conveying from those to whom you convey
+the Program, the only way you could satisfy both those terms and this
+License would be to refrain entirely from conveying the Program.
+
+  13. Use with the GNU Affero General Public License.
+
+  Notwithstanding any other provision of this License, you have
+permission to link or combine any covered work with a work licensed
+under version 3 of the GNU Affero General Public License into a single
+combined work, and to convey the resulting work.  The terms of this
+License will continue to apply to the part which is the covered work,
+but the special requirements of the GNU Affero General Public License,
+section 13, concerning interaction through a network will apply to the
+combination as such.
+
+  14. Revised Versions of this License.
+
+  The Free Software Foundation may publish revised and/or new versions of
+the GNU General Public License from time to time.  Such new versions will
+be similar in spirit to the present version, but may differ in detail to
+address new problems or concerns.
+
+  Each version is given a distinguishing version number.  If the
+Program specifies that a certain numbered version of the GNU General
+Public License "or any later version" applies to it, you have the
+option of following the terms and conditions either of that numbered
+version or of any later version published by the Free Software
+Foundation.  If the Program does not specify a version number of the
+GNU General Public License, you may choose any version ever published
+by the Free Software Foundation.
+
+  If the Program specifies that a proxy can decide which future
+versions of the GNU General Public License can be used, that proxy's
+public statement of acceptance of a version permanently authorizes you
+to choose that version for the Program.
+
+  Later license versions may give you additional or different
+permissions.  However, no additional obligations are imposed on any
+author or copyright holder as a result of your choosing to follow a
+later version.
+
+  15. Disclaimer of Warranty.
+
+  THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY
+APPLICABLE LAW.  EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT
+HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY
+OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO,
+THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
+PURPOSE.  THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM
+IS WITH YOU.  SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF
+ALL NECESSARY SERVICING, REPAIR OR CORRECTION.
+
+  16. Limitation of Liability.
+
+  IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING
+WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MODIFIES AND/OR CONVEYS
+THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY
+GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE
+USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF
+DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD
+PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS),
+EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF
+SUCH DAMAGES.
+
+  17. Interpretation of Sections 15 and 16.
+
+  If the disclaimer of warranty and limitation of liability provided
+above cannot be given local legal effect according to their terms,
+reviewing courts shall apply local law that most closely approximates
+an absolute waiver of all civil liability in connection with the
+Program, unless a warranty or assumption of liability accompanies a
+copy of the Program in return for a fee.
+
+                     END OF TERMS AND CONDITIONS
+
+            How to Apply These Terms to Your New Programs
+
+  If you develop a new program, and you want it to be of the greatest
+possible use to the public, the best way to achieve this is to make it
+free software which everyone can redistribute and change under these terms.
+
+  To do so, attach the following notices to the program.  It is safest
+to attach them to the start of each source file to most effectively
+state the exclusion of warranty; and each file should have at least
+the "copyright" line and a pointer to where the full notice is found.
+
+    <one line to give the program's name and a brief idea of what it does.>
+    Copyright (C) <year>  <name of author>
+
+    This program is free software: you can redistribute it and/or modify
+    it under the terms of the GNU General Public License as published by
+    the Free Software Foundation, either version 3 of the License, or
+    (at your option) any later version.
+
+    This program is distributed in the hope that it will be useful,
+    but WITHOUT ANY WARRANTY; without even the implied warranty of
+    MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+    GNU General Public License for more details.
+
+    You should have received a copy of the GNU General Public License
+    along with this program.  If not, see <http://www.gnu.org/licenses/>.
+
+Also add information on how to contact you by electronic and paper mail.
+
+  If the program does terminal interaction, make it output a short
+notice like this when it starts in an interactive mode:
+
+    <program>  Copyright (C) <year>  <name of author>
+    This program comes with ABSOLUTELY NO WARRANTY; for details type `show w'.
+    This is free software, and you are welcome to redistribute it
+    under certain conditions; type `show c' for details.
+
+The hypothetical commands `show w' and `show c' should show the appropriate
+parts of the General Public License.  Of course, your program's commands
+might be different; for a GUI interface, you would use an "about box".
+
+  You should also get your employer (if you work as a programmer) or school,
+if any, to sign a "copyright disclaimer" for the program, if necessary.
+For more information on this, and how to apply and follow the GNU GPL, see
+<http://www.gnu.org/licenses/>.
+
+  The GNU General Public License does not permit incorporating your program
+into proprietary programs.  If your program is a subroutine library, you
+may consider it more useful to permit linking proprietary applications with
+the library.  If this is what you want to do, use the GNU Lesser General
+Public License instead of this License.  But first, please read
+<http://www.gnu.org/philosophy/why-not-lgpl.html>.
--- a/README.md
+++ b/README.md
@ -0,0 +1,270 @@
+<h1 align="center">  Object Detection </h1>
+Object Detection task with YOLOv5 model.
+
+This document contains the explanations of arguments of each script.
+
+
+You can find the tutorial document for finetuning a pretrained model on COCO128 dataset under the `tutorial` folder, `tutorial/README.md`. 
+
+
+The ipython notebook tutorial is also prepared under the `tutorial` folder as `tutorial/tutorial.ipynb`. You may upload and run this ipython notebook on Google colab.
+
+# Prerequisites
+- Python 3.8 or above
+
+# Installation
+```bash
+$ pip install -U pip
+$ pip install -r requirements.txt
+```
+
+# Dataset & Preparation
+
+The image data, annotations and dataset.yaml are required. 
+
+## MS COCO
+
+Our traning script accepts MS COCO dataset. You may download the dataset using the following link:
+
+- Download [2017 MS COCO Dataset](https://cocodataset.org/#download) 
+
+## Custom Datasets
+
+You can also train the model on a custom dataset. 
+
+### Annotations Format
+After using a tool like [CVAT](https://github.com/openvinotoolkit/cvat), [makesense.ai](https://www.makesense.ai) or [Labelbox](https://labelbox.com) to label your images, export your labels to YOLO format, with one `*.txt` file per image (if no objects in image, no `*.txt` file is required). The `*.txt` file specifications are:
+
+- One row per object
+- Each row is `class x_center y_center width height` format.
+- Box coordinates must be in normalized xywh format (from 0 - 1). If your boxes are in pixels, divide `x_center` and `width` by image `width`, and `y_center` and `height` by image height.
+- Class numbers are zero-indexed (start from 0).
+
+<div align="center">
+<img src="./tutorial/screenshots/readme_img.jpg" width="50%" /> 
+</div>
+
+The label file corresponding to the above image contains 2 persons (class 0) and a tie (class 27):
+<div align="center">
+<img src="./tutorial/screenshots/readme_img2.png" width="40%" /> 
+</div>
+
+###  Directory Organization
+Your own datasets are expected to have the following structure. We assume `/dataset` is next to the `/yolov5` directory. YOLOv5 locates labels automatically for each image by replacing the last instance of `/images/` in each image path with `/labels/`.
+
+```bash
+- Dataset name
+    -- images
+        -- train
+            --- img001.jpg
+            --- ...
+        -- val
+            --- img002.jpg
+            --- ...
+    
+    -- labels
+        -- train
+            --- img001.txt
+            --- ...
+        -- val
+            --- img002.txt
+            --- ...
+
+- yolov5
+
+- generate_npy
+
+- exporting
+    
+```
+
+###  dataset.yaml
+
+The yaml file for COCO dataset has been prepared in `./data/coco.yaml`. For custom dataset, you need to prepare the yaml file and save it under `./data/`. The yaml file is expected to have the following format:
+```bash
+# train and val datasets (image directory or *.txt file with image paths)  
+train: ./datasets/images/train/  
+val: ./datasets/images/val/  
+
+# number of classes  
+nc: number of classes  
+
+# class names  
+names: list of class names
+
+```
+
+# Train
+
+For training on MS COCO, execute commands in the folder `yolov5`:
+```shell
+CUDA_VISIBLE_DEVICES='0' python train.py --data coco.yaml --cfg yolov5s-noupsample.yaml --weights '' --batch-size 64 
+```
+
+`CUDA_VISIBLE_DEVICES='0'` indicates the gpu ids.
+
+`--data` the yaml file. (located under `./data/`)
+
+`--cfg` the model configuration. (located under `./model/`) (`yolov5s-noupsample.yaml` for 520, `yolov5s.yaml` for 720)
+
+`--hyp` the path to hyperparameters file. (located under `./data/`)
+
+`--weights` the path to pretained model weights. ('' if train from scratch)
+
+`--epochs` the number of epochs to train. (Default: 300)
+
+`--batch-size` batch size. (Default: 16)
+
+`--img-size` the input size of the model. (Default: (640, 640))
+
+`--workers` the maximum number of dataloader workers. (Default: 8)
+
+By default, the trained models are saved under `./runs/train/`.
+
+## Generating .npy for different model input
+We can generating `.npy` for different model input by using `yolov5_generate_npy.py`. Execute commands in the folder `generate_npy`:
+```shell
+python yolov5_generate_npy.py --input-h 640 --input-w 640 
+```
+
+`--input-h` the input height. (Default: 640)
+`--input-w` the input width. (Default: 640)
+
+We could get `*.npy`
+
+# Configure the paths yaml file
+You are expected to create a yaml file which stores all the paths related to the trained models. This yaml file will be used in the following sections. You can check and modify the `pretrained_paths_520.yaml` and `pretrained_paths_720.yaml` under `/yolov5/data/`. The yaml file is expected to contain the following information:
+
+```shell
+grid_dir: path_to_npy_file_directory
+grid20_path: path_to_grid20_npy_file
+grid40_path: path_to_grid40_npy_file
+grid80_path: path_to_grid80_npy_file
+
+yolov5_dir: path_to_yolov5_directory
+path: path_to_pretrained_yolov5_model_weights_pt_file
+yaml_path: path_to_the_model_configuration_yaml_file
+pt_path: path_to_export_yolov5_model_weights_kneron_supported_file
+onnx_export_file: path_to_export_yolov5_onnx_model_file
+
+input_w: model_input_weight
+input_h: model_input_height
+
+nc: number_of_classes
+
+names: list_of_class_names
+```
+
+# Save and Convert to ONNX
+This section will introduce how to save the trained model for pytorch1.4 supported format and convert to ONNX. 
+
+## Exporting ONNX model in the PyTorch 1.7 environment
+We can convert the model to onnx by using `yolov5_export.py`. Execute commands in the folder `yolov5`:
+```shell
+python ../exporting/yolov5_export.py --data path_to_pretrained_path_yaml_file
+```
+
+`--data` the path to pretrained model paths yaml file (Default: ../yolov5/data/pretrained_paths_520.yaml)
+
+We could get onnx model. 
+
+
+## Converting onnx by tool chain
+Pull the latest [ONNX converter](https://github.com/kneron/ONNX_Convertor/tree/master/optimizer_scripts) from github. You may read the latest document from Github for converting ONNX model. Execute commands in the folder `ONNX_Convertor/optimizer_scripts`:
+(reference: https://github.com/kneron/ONNX_Convertor/tree/master/optimizer_scripts)
+
+```shell
+python -m onnxsim input_onnx_model output_onnx_model
+
+python pytorch2onnx.py input.pth output.onnx
+```
+
+We could get converted onnx model.
+
+
+# Inference
+
+Before model inference, we assume that the model has been converted to onnx model as in the previous section (even if only inference pth model). Create a yaml file containing the path information. For model inference on a single image, execute commands in the folder `yolov5`:
+```shell
+python inference.py --data path_to_pretrained_path_yaml_file --img-path path_to_image --save-path path_to_saved_image
+```
+
+`--img-path` the path to the image.
+
+`--save-path` the path to draw and save the image with bbox.
+
+`--data` the path to pretrained model paths yaml file. (Default: data/pretrained_paths_520.yaml)
+
+`--conf_thres` the score threshold of bounding boxes. (Default: 0.3)
+
+`--iou_thres` the iou threshold for NMS. (Default: 0.3) 
+
+`--onnx` whether is onnx model inference. 
+
+You could find preprocessing and postprocessing processes under the folder `exporting/yolov5/`. 
+
+
+# Evaluation 
+
+## Evaluation Metric
+We will use mean Average Precision (mAP) for evaluation. You can find the script for computing mAP in `test.py`.
+
+`mAP`: mAP is the average of Average Precision (AP). AP summarizes a precision-recall curve as the weighted mean of precisions achieved at each threshold, with the increase in recall from the previous threshold used as the weight:
+
+<img src="https://latex.codecogs.com/svg.image?AP&space;=&space;\sum_n&space;(R_n-R_{n-1})P_n&space;" title="AP = \sum_n (R_n-R_{n-1})P_n " />
+
+where <img src="https://latex.codecogs.com/svg.image?R_n" title="R_n" />  and <img src="https://latex.codecogs.com/svg.image?P_n" title="P_n" /> are the precision and recall at the nth threshold. The mAP compares the ground-truth bounding box to the detected box and returns a score. The higher the score, the more accurate the model is in its detections.
+
+## Evaluation on a Dataset
+For evaluating the trained model on dataset:
+
+```shell
+python test.py --weights path_to_pth_model_weight --data path_to_data_yaml_file
+```
+
+`--weights` The path to pretrained model weight. (Defalut: best.pt)
+
+`--data` The path to data yaml file. (Default: data/coco128.yaml)
+
+`--img-size` Input shape of the model (Default: (640, 640))
+
+`--conf-thres` Object confidence threshold. (Default: 0.001)
+
+`--device` Cuda device, i.e. 0 or 0,1,2,3 or cpu. (Default: cpu)
+
+`--verbose` Whether report mAP by class.
+
+## End-to-End Evaluation
+If you would like to perform an end-to-end test with an image dataset, you can use `inference_e2e.py` under the directory `yolov5` to obtain the prediction results.
+You have to prepare an initial parameter yaml file for the inference runner. You may check `utils/init_params.yaml` for the format.
+```shell
+python inference_e2e.py --img-path path_to_dataset_folder --params path_to_init_params_file --save-path path_to_save_json_file
+```
+`--img-path` Path to the dataset directory
+
+`--params` Path to initial parameter yaml file for the inference runner
+
+`--save-path` Path to save the prediction to a json file
+
+`--gpu` GPU id  (-1 if cpu) (Default: -1)
+
+The predictions will be saved into a json file that has the following structure:
+```bash
+[
+    {'img_path':image_path_1
+    'bbox': [[l,t,w,h,score,class_id], [l,t,w,h,score,class_id]]
+    },
+    {'img_path':image_path_2
+    'bbox': [[l,t,w,h,score,class_id], [l,t,w,h,score,class_id]]
+    },
+    ...
+]
+```
+# Model
+
+Backbone | Input Size |  FPS on 520 | FPS on 720  | Model Size | mAP
+--- | --- |:---:|:---:|:---:|:---:
+[YOLOv5s (no upsample)](https://github.com/kneron/Model_Zoo/tree/main/detection/yolov5/yolov5s-noupsample) | 640x640 | 4.91429 | - | 13.1M | 40.4%
+[YOLOv5s (with upsample)](https://github.com/kneron/Model_Zoo/tree/main/detection/yolov5/yolov5s) | 640x640 | - | 24.4114 | 14.6M | 50.9%
+
+[YOLOv5s (no upsample)](https://github.com/kneron/Model_Zoo/tree/main/detection/yolov5/yolov5s-noupsample) is the yolov5s model backbone without upsampling operation, since 520 hardware does not support upsampling operation.
--- a/data/annapretrained_paths_520.yaml
+++ b/data/annapretrained_paths_520.yaml
@ -0,0 +1,17 @@
+grid_dir: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/
+grid20_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/20_640x640.npy
+grid40_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/40_640x640.npy
+grid80_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/80_640x640.npy
+
+
+yolov5_dir: C:/Users/rd_de/kneronyolov5/yolov5
+path: C:/Users/rd_de/kneronyolov5/yolov5/runs/train/exp59/weights/best.pt
+yaml_path: C:/Users/rd_de/kneronyolov5/yolov5/models/yolov5s-noupsample.yaml
+pt_path: C:/Users/rd_de/kneronyolov5/yolov5/weights/for520best.pt
+onnx_export_file: C:/Users/rd_de/kneronyolov5/yolov5/runs/train/exp59/weights/best.onnx
+
+input_w: 640
+input_h: 640
+
+nc: 1
+names: ['License Plate']
--- a/data/coco.yaml
+++ b/data/coco.yaml
@ -0,0 +1,33 @@
+# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+path: ../datasets/coco  # dataset root dir
+train: /home/ziyan/Dataset/COCO/coco/images/train2017/  # 118287 images
+val: /home/ziyan/Dataset/COCO/coco/images/val2017/  # 5000 images
+
+# number of classes
+nc: 80
+
+# class names
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']
+
+# Download script/URL (optional)
+download: |
+  from utils.general import download, Path
+  # Download labels
+  segments = False  # segment or box labels
+  dir = Path(yaml['path'])  # dataset root dir
+  url = 'https://github.com/ultralytics/yolov5/releases/download/v1.0/'
+  urls = [url + ('coco2017labels-segments.zip' if segments else 'coco2017labels.zip')]  # labels
+  download(urls, dir=dir.parent)
+  # Download data
+  urls = ['http://images.cocodataset.org/zips/train2017.zip',  # 19G, 118k images
+          'http://images.cocodataset.org/zips/val2017.zip',  # 1G, 5k images
+          'http://images.cocodataset.org/zips/test2017.zip']  # 7G, 41k images (optional)
+  download(urls, dir=dir / 'images', threads=3)
--- a/data/coco128.yaml
+++ b/data/coco128.yaml
@ -0,0 +1,28 @@
+# COCO 2017 dataset http://cocodataset.org - first 128 training images
+# Train command: python train.py --data coco128.yaml
+# Default dataset location is next to /yolov5:
+#   /parent_folder
+#     /coco128
+#     /yolov5
+
+
+# download command/URL (optional)
+download: https://github.com/ultralytics/yolov5/releases/download/v1.0/coco128.zip
+
+# train and val data as 1) directory: path/images/, 2) file: path/images.txt, or 3) list: [path1/images/, path2/images/]
+train: ../coco128/images/train2017/  # 128 images
+val: ../coco128/images/train2017/  # 128 images
+
+# number of classes
+nc: 80
+
+# class names
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']
--- a/data/custom.yaml
+++ b/data/custom.yaml
@ -0,0 +1,9 @@
+# train and val data as directory: path/labels/
+train: ../image_data/images/train/ 
+val: ../image_data/images/val/  
+
+# number of classes
+nc: 3
+
+# class names
+names: ['air conditioner', 'dog', 'fence']
--- a/data/hyp.finetune.yaml
+++ b/data/hyp.finetune.yaml
@ -0,0 +1,38 @@
+# Hyperparameters for VOC finetuning
+# python train.py --batch 64 --weights yolov5m.pt --data voc.yaml --img 512 --epochs 50
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+
+
+# Hyperparameter Evolution Results
+# Generations: 306
+#                   P         R     mAP.5 mAP.5:.95       box       obj       cls
+# Metrics:        0.6     0.936     0.896     0.684    0.0115   0.00805   0.00146
+
+lr0: 0.0032
+lrf: 0.12
+momentum: 0.843
+weight_decay: 0.00036
+warmup_epochs: 2.0
+warmup_momentum: 0.5
+warmup_bias_lr: 0.05
+box: 0.0296
+cls: 0.243
+cls_pw: 0.631
+obj: 0.301
+obj_pw: 0.911
+iou_t: 0.2
+anchor_t: 2.91
+# anchors: 3.63
+fl_gamma: 0.0
+hsv_h: 0.0138
+hsv_s: 0.664
+hsv_v: 0.464
+degrees: 0.373
+translate: 0.245
+scale: 0.898
+shear: 0.602
+perspective: 0.0
+flipud: 0.00856
+fliplr: 0.5
+mosaic: 1.0 #0.0
+mixup: 0.243
--- a/data/hyp.scratch.yaml
+++ b/data/hyp.scratch.yaml
@ -0,0 +1,33 @@
+# Hyperparameters for COCO training from scratch
+# python train.py --batch 40 --cfg yolov5m.yaml --weights '' --data coco.yaml --img 640 --epochs 300
+# See tutorials for hyperparameter evolution https://github.com/ultralytics/yolov5#tutorials
+
+
+lr0: 0.01  # initial learning rate (SGD=1E-2, Adam=1E-3)
+lrf: 0.2  # final OneCycleLR learning rate (lr0 * lrf)
+momentum: 0.937  # SGD momentum/Adam beta1
+weight_decay: 0.0005  # optimizer weight decay 5e-4
+warmup_epochs: 3.0  # warmup epochs (fractions ok)
+warmup_momentum: 0.8  # warmup initial momentum
+warmup_bias_lr: 0.1  # warmup initial bias lr
+box: 0.05  # box loss gain
+cls: 0.5  # cls loss gain
+cls_pw: 1.0  # cls BCELoss positive_weight
+obj: 1.0  # obj loss gain (scale with pixels)
+obj_pw: 1.0  # obj BCELoss positive_weight
+iou_t: 0.20  # IoU training threshold
+anchor_t: 4.0  # anchor-multiple threshold
+# anchors: 3  # anchors per output layer (0 to ignore)
+fl_gamma: 0.0  # focal loss gamma (efficientDet default gamma=1.5)
+hsv_h: 0.015  # image HSV-Hue augmentation (fraction)
+hsv_s: 0.7  # image HSV-Saturation augmentation (fraction)
+hsv_v: 0.4  # image HSV-Value augmentation (fraction)
+degrees: 0.0  # image rotation (+/- deg)
+translate: 0.1  # image translation (+/- fraction)
+scale: 0.5  # image scale (+/- gain)
+shear: 0.0  # image shear (+/- deg)
+perspective: 0.0  # image perspective (+/- fraction), range 0-0.001
+flipud: 0.0  # image flip up-down (probability)
+fliplr: 0.5  # image flip left-right (probability)
+mosaic: 1.0  # image mosaic (probability)
+mixup: 0.0  # image mixup (probability)
--- a/data/mepretrained_paths_520.yaml
+++ b/data/mepretrained_paths_520.yaml
@ -0,0 +1,21 @@
+grid_dir: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/
+grid20_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/20_640x640.npy
+grid40_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/40_640x640.npy
+grid80_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/80_640x640.npy
+
+
+yolov5_dir: C:/Users/rd_de/kneronyolov5/yolov5
+path: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp24/weights/best.pt
+yaml_path: C:/Users/rd_de/golfaceyolov5/yolov5/models/yolov5s-noupsample.yaml
+pt_path: C:/Users/rd_de/golfaceyolov5/yolov5/weights/for520best.pt
+onnx_export_file: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp24/weights/best.onnx
+
+input_w: 640
+input_h: 640
+
+nc: 4
+names: ['100', '1000', '50', '500']
+
+
+#nc: 6
+#names: ['Break circuit', 'bulge', 'foreign object', 'scratch', 'short circuit', 'white spot']
--- a/data/mepretrained_paths_630.yaml
+++ b/data/mepretrained_paths_630.yaml
@ -0,0 +1,17 @@
+grid_dir: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/
+grid20_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/20_640x640.npy
+grid40_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/40_640x640.npy
+grid80_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/80_640x640.npy
+
+
+yolov5_dir: C:/Users/rd_de/kneronyolov5/yolov5
+path: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp16/weights/best.pt
+yaml_path: C:/Users/rd_de/golfaceyolov5/yolov5/models/yolov5s.yaml
+pt_path: C:/Users/rd_de/golfaceyolov5/yolov5/weights/for720best.pt
+onnx_export_file: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp16/weights/best.onnx
+
+input_w: 640
+input_h: 640
+
+nc: 4
+names: ['car', 'greenery', 'person', 'tree']
--- a/data/mepretrained_paths_630class2.yaml
+++ b/data/mepretrained_paths_630class2.yaml
@ -0,0 +1,17 @@
+grid_dir: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/
+grid20_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/20_640x640.npy
+grid40_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/40_640x640.npy
+grid80_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/80_640x640.npy
+
+
+yolov5_dir: C:/Users/rd_de/kneronyolov5/yolov5
+path: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp13/weights/best.pt
+yaml_path: C:/Users/rd_de/golfaceyolov5/yolov5/models/yolov5s.yaml
+pt_path: C:/Users/rd_de/golfaceyolov5/yolov5/weights/for720best.pt
+onnx_export_file: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp13/weights/best.onnx
+
+input_w: 640
+input_h: 640
+
+nc: 2
+names: ['bunker', 'pond']
--- a/data/mepretrained_paths_630class8.yaml
+++ b/data/mepretrained_paths_630class8.yaml
@ -0,0 +1,16 @@
+grid_dir: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/
+grid20_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/20_640x640.npy
+grid40_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/40_640x640.npy
+grid80_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/80_640x640.npy
+
+
+yolov5_dir: C:/Users/rd_de/kneronyolov5/yolov5
+path: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp29/weights/best.pt
+yaml_path: C:/Users/rd_de/golfaceyolov5/yolov5/models/yolov5s.yaml
+pt_path: C:/Users/rd_de/golfaceyolov5/yolov5/weights/for720best.pt
+onnx_export_file: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp29/weights/best.onnx
+
+nc: 8
+names: ['bunker', 'car', 'grass', 'greenery', 'person', 'pond', 'road', 'tree']
+input_w: 640
+input_h: 640
--- a/data/mepretrained_paths_720.yaml
+++ b/data/mepretrained_paths_720.yaml
@ -0,0 +1,17 @@
+grid_dir: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/
+grid20_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/20_640x640.npy
+grid40_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/40_640x640.npy
+grid80_path: C:/Users/rd_de/kneronyolov5/ai_training/detection/yolov5/generate_npy/80_640x640.npy
+
+
+yolov5_dir: C:/Users/rd_de/kneronyolov5/yolov5
+path: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp19/weights/best.pt
+yaml_path: C:/Users/rd_de/golfaceyolov5/yolov5/models/yolov5s.yaml
+pt_path: C:/Users/rd_de/golfaceyolov5/yolov5/weights/for720best.pt
+onnx_export_file: C:/Users/rd_de/golfaceyolov5/yolov5/runs/train/exp19/weights/best.onnx
+
+input_w: 640
+input_h: 640
+
+nc: 4
+names: ['car', 'greenery', 'person', 'tree']
--- a/data/model_paths_520_coco128.yaml
+++ b/data/model_paths_520_coco128.yaml
@ -0,0 +1,25 @@
+grid_dir: ../generate_npy/
+grid20_path: ../generate_npy/20_640x640.npy
+grid40_path: ../generate_npy/40_640x640.npy
+grid80_path: ../generate_npy/80_640x640.npy
+
+yolov5_dir: ./
+path: ./runs/train/exp/weights/best.pt
+yaml_path: ./models/yolov5s-noupsample.yaml
+pt_path: ./yolov5s-noupsample-coco128.pt   # pytorch 1.4
+onnx_export_file: ./yolov5s-noupsample-coco128.onnx
+
+input_w: 640
+input_h: 640
+# number of classes
+nc: 80
+# class names
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']
--- a/data/pretrained_paths_520.yaml
+++ b/data/pretrained_paths_520.yaml
@ -0,0 +1,25 @@
+grid_dir: ../generate_npy/
+grid20_path: ../generate_npy/20_640x640.npy
+grid40_path: ../generate_npy/40_640x640.npy
+grid80_path: ../generate_npy/80_640x640.npy
+
+yolov5_dir: ./
+path: ./best.pt
+yaml_path: ./models/yolov5s-noupsample.yaml
+pt_path: ./yolov5s-noupsample.pt   # pytorch 1.4
+onnx_export_file: ./yolov5s-noupsample.onnx
+
+input_w: 640
+input_h: 640
+# number of classes
+nc: 80
+# class names
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']
--- a/data/pretrained_paths_720.yaml
+++ b/data/pretrained_paths_720.yaml
@ -0,0 +1,25 @@
+grid_dir: ../generate_npy/
+grid20_path: ../generate_npy/20_640x640.npy
+grid40_path: ../generate_npy/40_640x640.npy
+grid80_path: ../generate_npy/80_640x640.npy
+
+yolov5_dir: ./
+path: ./best.pt
+yaml_path: ./models/yolov5s.yaml
+pt_path: ./yolov5s.pt   # pytorch 1.4
+onnx_export_file: ./yolov5s.onnx
+
+input_w: 640
+input_h: 640
+# number of classes
+nc: 80
+# class names
+names: ['person', 'bicycle', 'car', 'motorcycle', 'airplane', 'bus', 'train', 'truck', 'boat', 'traffic light',
+        'fire hydrant', 'stop sign', 'parking meter', 'bench', 'bird', 'cat', 'dog', 'horse', 'sheep', 'cow',
+        'elephant', 'bear', 'zebra', 'giraffe', 'backpack', 'umbrella', 'handbag', 'tie', 'suitcase', 'frisbee',
+        'skis', 'snowboard', 'sports ball', 'kite', 'baseball bat', 'baseball glove', 'skateboard', 'surfboard',
+        'tennis racket', 'bottle', 'wine glass', 'cup', 'fork', 'knife', 'spoon', 'bowl', 'banana', 'apple',
+        'sandwich', 'orange', 'broccoli', 'carrot', 'hot dog', 'pizza', 'donut', 'cake', 'chair', 'couch',
+        'potted plant', 'bed', 'dining table', 'toilet', 'tv', 'laptop', 'mouse', 'remote', 'keyboard', 'cell phone',
+        'microwave', 'oven', 'toaster', 'sink', 'refrigerator', 'book', 'clock', 'vase', 'scissors', 'teddy bear',
+        'hair drier', 'toothbrush']
--- a/data/scripts/get_coco.sh
+++ b/data/scripts/get_coco.sh
@ -0,0 +1,24 @@
+#!/bin/bash
+# COCO 2017 dataset http://cocodataset.org
+# Download command: bash data/scripts/get_coco.sh
+# Train command: python train.py --data coco.yaml
+# Default dataset location is next to /yolov5:
+#   /parent_folder
+#     /coco
+#     /yolov5
+
+# Download/unzip labels
+d='../' # unzip directory
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+f='coco2017labels.zip'                                                                 # 68 MB
+echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
+
+# Download/unzip images
+d='../coco/images' # unzip directory
+url=http://images.cocodataset.org/zips/
+f1='train2017.zip' # 19G, 118k images
+f2='val2017.zip'   # 1G, 5k images
+f3='test2017.zip'  # 7G, 41k images (optional)
+for f in $f1 $f2; do
+  echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
+done
--- a/data/scripts/get_voc.sh
+++ b/data/scripts/get_voc.sh
@ -0,0 +1,137 @@
+#!/bin/bash
+# PASCAL VOC dataset http://host.robots.ox.ac.uk/pascal/VOC/
+# Download command: bash data/scripts/get_voc.sh
+# Train command: python train.py --data voc.yaml
+# Default dataset location is next to /yolov5:
+#   /parent_folder
+#     /VOC
+#     /yolov5
+
+start=$(date +%s)
+mkdir -p ../tmp
+cd ../tmp/
+
+# Download/unzip images and labels
+d='.' # unzip directory
+url=https://github.com/ultralytics/yolov5/releases/download/v1.0/
+f1=VOCtrainval_06-Nov-2007.zip # 446MB, 5012 images
+f2=VOCtest_06-Nov-2007.zip     # 438MB, 4953 images
+f3=VOCtrainval_11-May-2012.zip # 1.95GB, 17126 images
+for f in $f1 $f2 $f3; do
+  echo 'Downloading' $url$f ' ...' && curl -L $url$f -o $f && unzip -q $f -d $d && rm $f # download, unzip, remove
+done
+
+end=$(date +%s)
+runtime=$((end - start))
+echo "Completed in" $runtime "seconds"
+
+echo "Splitting dataset..."
+python3 - "$@" <<END
+import xml.etree.ElementTree as ET
+import pickle
+import os
+from os import listdir, getcwd
+from os.path import join
+
+sets=[('2012', 'train'), ('2012', 'val'), ('2007', 'train'), ('2007', 'val'), ('2007', 'test')]
+
+classes = ["aeroplane", "bicycle", "bird", "boat", "bottle", "bus", "car", "cat", "chair", "cow", "diningtable", "dog", "horse", "motorbike", "person", "pottedplant", "sheep", "sofa", "train", "tvmonitor"]
+
+
+def convert(size, box):
+    dw = 1./(size[0])
+    dh = 1./(size[1])
+    x = (box[0] + box[1])/2.0 - 1
+    y = (box[2] + box[3])/2.0 - 1
+    w = box[1] - box[0]
+    h = box[3] - box[2]
+    x = x*dw
+    w = w*dw
+    y = y*dh
+    h = h*dh
+    return (x,y,w,h)
+
+def convert_annotation(year, image_id):
+    in_file = open('VOCdevkit/VOC%s/Annotations/%s.xml'%(year, image_id))
+    out_file = open('VOCdevkit/VOC%s/labels/%s.txt'%(year, image_id), 'w')
+    tree=ET.parse(in_file)
+    root = tree.getroot()
+    size = root.find('size')
+    w = int(size.find('width').text)
+    h = int(size.find('height').text)
+
+    for obj in root.iter('object'):
+        difficult = obj.find('difficult').text
+        cls = obj.find('name').text
+        if cls not in classes or int(difficult)==1:
+            continue
+        cls_id = classes.index(cls)
+        xmlbox = obj.find('bndbox')
+        b = (float(xmlbox.find('xmin').text), float(xmlbox.find('xmax').text), float(xmlbox.find('ymin').text), float(xmlbox.find('ymax').text))
+        bb = convert((w,h), b)
+        out_file.write(str(cls_id) + " " + " ".join([str(a) for a in bb]) + '\n')
+
+wd = getcwd()
+
+for year, image_set in sets:
+    if not os.path.exists('VOCdevkit/VOC%s/labels/'%(year)):
+        os.makedirs('VOCdevkit/VOC%s/labels/'%(year))
+    image_ids = open('VOCdevkit/VOC%s/ImageSets/Main/%s.txt'%(year, image_set)).read().strip().split()
+    list_file = open('%s_%s.txt'%(year, image_set), 'w')
+    for image_id in image_ids:
+        list_file.write('%s/VOCdevkit/VOC%s/JPEGImages/%s.jpg\n'%(wd, year, image_id))
+        convert_annotation(year, image_id)
+    list_file.close()
+
+END
+
+cat 2007_train.txt 2007_val.txt 2012_train.txt 2012_val.txt >train.txt
+cat 2007_train.txt 2007_val.txt 2007_test.txt 2012_train.txt 2012_val.txt >train.all.txt
+
+python3 - "$@" <<END
+
+import shutil
+import os
+os.system('mkdir ../VOC/')
+os.system('mkdir ../VOC/images')
+os.system('mkdir ../VOC/images/train')
+os.system('mkdir ../VOC/images/val')
+
+os.system('mkdir ../VOC/labels')
+os.system('mkdir ../VOC/labels/train')
+os.system('mkdir ../VOC/labels/val')
+
+import os
+print(os.path.exists('../tmp/train.txt'))
+f = open('../tmp/train.txt', 'r')
+lines = f.readlines()
+
+for line in lines:
+    line = "/".join(line.split('/')[-5:]).strip()
+    if (os.path.exists("../" + line)):
+        os.system("cp ../"+ line + " ../VOC/images/train")
+        
+    line = line.replace('JPEGImages', 'labels')
+    line = line.replace('jpg', 'txt')
+    if (os.path.exists("../" + line)):
+        os.system("cp ../"+ line + " ../VOC/labels/train")
+
+
+print(os.path.exists('../tmp/2007_test.txt'))
+f = open('../tmp/2007_test.txt', 'r')
+lines = f.readlines()
+
+for line in lines:
+    line = "/".join(line.split('/')[-5:]).strip()
+    if (os.path.exists("../" + line)):
+        os.system("cp ../"+ line + " ../VOC/images/val")
+        
+    line = line.replace('JPEGImages', 'labels')
+    line = line.replace('jpg', 'txt')
+    if (os.path.exists("../" + line)):
+        os.system("cp ../"+ line + " ../VOC/labels/val")
+
+END
+
+rm -rf ../tmp # remove temporary directory
+echo "VOC download done."
--- a/datasets/test/images/765706588_040624_mp4-0001_jpg.rf.e5e135b768bfdd6fc7672c6f72939863.jpg
+++ b/datasets/test/images/765706588_040624_mp4-0001_jpg.rf.e5e135b768bfdd6fc7672c6f72939863.jpg
--- a/datasets/test/images/frame_00124_jpg.rf.a7b6f14f61fe01b4978fadaaa0408002.jpg
+++ b/datasets/test/images/frame_00124_jpg.rf.a7b6f14f61fe01b4978fadaaa0408002.jpg
--- a/datasets/test/images/frame_00235_jpg.rf.4247e072bd0c90c04ca47f90af72087c.jpg
+++ b/datasets/test/images/frame_00235_jpg.rf.4247e072bd0c90c04ca47f90af72087c.jpg
--- a/datasets/test/images/frame_00237_jpg.rf.9c6a3860b1e0af420c128366dc45f729.jpg
+++ b/datasets/test/images/frame_00237_jpg.rf.9c6a3860b1e0af420c128366dc45f729.jpg
--- a/datasets/test/images/frame_01255_jpg.rf.8a40b37a20ad9853f0fc94596c1ea3da.jpg
+++ b/datasets/test/images/frame_01255_jpg.rf.8a40b37a20ad9853f0fc94596c1ea3da.jpg
--- a/datasets/test/images/frame_01263_jpg.rf.3335d5da5dd3f10aabaf65f8c1dbb3cb.jpg
+++ b/datasets/test/images/frame_01263_jpg.rf.3335d5da5dd3f10aabaf65f8c1dbb3cb.jpg
--- a/datasets/test/images/frame_01270_jpg.rf.cc45cf417809fcf171fb970ed14c1c4c.jpg
+++ b/datasets/test/images/frame_01270_jpg.rf.cc45cf417809fcf171fb970ed14c1c4c.jpg
--- a/datasets/test/images/frame_01399_jpg.rf.fb73708d42132080535669f0bb37e876.jpg
+++ b/datasets/test/images/frame_01399_jpg.rf.fb73708d42132080535669f0bb37e876.jpg
--- a/datasets/test/images/images_0013_jpg.rf.868fc3ea1dc01309dd13f328c984b257.jpg
+++ b/datasets/test/images/images_0013_jpg.rf.868fc3ea1dc01309dd13f328c984b257.jpg
--- a/datasets/test/images/images_0017_jpg.rf.39ec10af4161c51c195cb10a0c9db28e.jpg
+++ b/datasets/test/images/images_0017_jpg.rf.39ec10af4161c51c195cb10a0c9db28e.jpg
--- a/datasets/test/labels/765706588_040624_mp4-0001_jpg.rf.e5e135b768bfdd6fc7672c6f72939863.txt
+++ b/datasets/test/labels/765706588_040624_mp4-0001_jpg.rf.e5e135b768bfdd6fc7672c6f72939863.txt
@ -0,0 +1,3 @@
+0 0.3703125 0.66875 0.1203125 0.0109375
+1 0.6375 0.73046875 0.15625 0.14140625
+1 0.41875 0.76875 0.28828125 0.0765625
--- a/datasets/test/labels/frame_00124_jpg.rf.a7b6f14f61fe01b4978fadaaa0408002.txt
+++ b/datasets/test/labels/frame_00124_jpg.rf.a7b6f14f61fe01b4978fadaaa0408002.txt
@ -0,0 +1,2 @@
+0 0.771875 0.50859375 0.03203125 0.0703125
+0 0.71484375 0.46171875 0.05 0.0515625
--- a/datasets/test/labels/frame_00235_jpg.rf.4247e072bd0c90c04ca47f90af72087c.txt
+++ b/datasets/test/labels/frame_00235_jpg.rf.4247e072bd0c90c04ca47f90af72087c.txt
@ -0,0 +1,3 @@
+0 0.02265625 0.95078125 0.0453125 0.0953125
+0 0.09375 0.7875 0.0328125 0.07421875
+0 0.15703125 0.68046875 0.04921875 0.090625
--- a/datasets/test/labels/frame_00237_jpg.rf.9c6a3860b1e0af420c128366dc45f729.txt
+++ b/datasets/test/labels/frame_00237_jpg.rf.9c6a3860b1e0af420c128366dc45f729.txt
@ -0,0 +1,2 @@
+0 0.0203125 0.92109375 0.040625 0.1578125
+0 0.1046875 0.73046875 0.0609375 0.07421875
--- a/datasets/test/labels/frame_01255_jpg.rf.8a40b37a20ad9853f0fc94596c1ea3da.txt
+++ b/datasets/test/labels/frame_01255_jpg.rf.8a40b37a20ad9853f0fc94596c1ea3da.txt
@ -0,0 +1,2 @@
+1 0.590625 0.2671875 0.0546875 0.03125
+1 0.6546875 0.28359375 0.06875 0.03046875
--- a/datasets/test/labels/frame_01263_jpg.rf.3335d5da5dd3f10aabaf65f8c1dbb3cb.txt
+++ b/datasets/test/labels/frame_01263_jpg.rf.3335d5da5dd3f10aabaf65f8c1dbb3cb.txt
@ -0,0 +1,2 @@
+1 0.67265625 0.31640625 0.1453125 0.09765625
+1 0.81484375 0.38359375 0.12421875 0.11015625
--- a/datasets/test/labels/frame_01270_jpg.rf.cc45cf417809fcf171fb970ed14c1c4c.txt
+++ b/datasets/test/labels/frame_01270_jpg.rf.cc45cf417809fcf171fb970ed14c1c4c.txt
@ -0,0 +1,3 @@
+1 0.8921875 0.42734375 0.0265625 0.03671875
+1 0.94609375 0.47578125 0.02890625 0.05703125
+1 0.98671875 0.53046875 0.025 0.05546875
--- a/datasets/test/labels/frame_01399_jpg.rf.fb73708d42132080535669f0bb37e876.txt
+++ b/datasets/test/labels/frame_01399_jpg.rf.fb73708d42132080535669f0bb37e876.txt
@ -0,0 +1,3 @@
+1 0.80078125 0.45234375 0.115625 0.07890625
+1 0.93671875 0.53359375 0.1265625 0.1671875
+1 0.678125 0.41171875 0.03125 0.0265625
--- a/datasets/test/labels/images_0013_jpg.rf.868fc3ea1dc01309dd13f328c984b257.txt
+++ b/datasets/test/labels/images_0013_jpg.rf.868fc3ea1dc01309dd13f328c984b257.txt
@ -0,0 +1 @@
+1 0.6609375 0.4421875 0.03046875 0.02265625
--- a/datasets/test/labels/images_0017_jpg.rf.39ec10af4161c51c195cb10a0c9db28e.txt
+++ b/datasets/test/labels/images_0017_jpg.rf.39ec10af4161c51c195cb10a0c9db28e.txt
@ -0,0 +1 @@
+1 0.5234375 0.390625 0.0328125 0.01640625
--- a/detect.py
+++ b/detect.py
@ -0,0 +1,172 @@
+import argparse
+import time
+from pathlib import Path
+
+import cv2
+import torch
+import torch.backends.cudnn as cudnn
+from numpy import random
+
+from models.experimental import attempt_load
+from utils.datasets import LoadStreams, LoadImages
+from utils.general import check_img_size, non_max_suppression, apply_classifier, scale_coords, xyxy2xywh, \
+    strip_optimizer, set_logging, increment_path
+from utils.plots import plot_one_box
+from utils.torch_utils import select_device, load_classifier, time_synchronized
+
+
+def detect(save_img=False):
+    source, weights, view_img, save_txt, imgsz = opt.source, opt.weights, opt.view_img, opt.save_txt, opt.img_size
+    webcam = source.isnumeric() or source.endswith('.txt') or source.lower().startswith(
+        ('rtsp://', 'rtmp://', 'http://'))
+
+    # Directories
+    save_dir = Path(increment_path(Path(opt.project) / opt.name, exist_ok=opt.exist_ok))  # increment run
+    (save_dir / 'labels' if save_txt else save_dir).mkdir(parents=True, exist_ok=True)  # make dir
+
+    # Initialize
+    set_logging()
+    device = select_device(opt.device)
+    half = device.type != 'cpu'  # half precision only supported on CUDA
+
+    # Load model
+    model = attempt_load(weights, map_location=device)  # load FP32 model
+    imgsz = check_img_size(imgsz, s=model.stride.max())  # check img_size
+    if half:
+        model.half()  # to FP16
+
+    # Second-stage classifier
+    classify = False
+    if classify:
+        modelc = load_classifier(name='resnet101', n=2)  # initialize
+        modelc.load_state_dict(torch.load('weights/resnet101.pt', map_location=device)['model']).to(device).eval()
+
+    # Set Dataloader
+    vid_path, vid_writer = None, None
+    if webcam:
+        view_img = True
+        cudnn.benchmark = True  # set True to speed up constant image size inference
+        dataset = LoadStreams(source, img_size=imgsz)
+    else:
+        save_img = True
+        dataset = LoadImages(source, img_size=imgsz)
+
+    # Get names and colors
+    names = model.module.names if hasattr(model, 'module') else model.names
+    colors = [[random.randint(0, 255) for _ in range(3)] for _ in names]
+
+    # Run inference
+    t0 = time.time()
+    img = torch.zeros((1, 3, imgsz, imgsz), device=device)  # init img
+    _ = model(img.half() if half else img) if device.type != 'cpu' else None  # run once
+    for path, img, im0s, vid_cap in dataset:
+        img = torch.from_numpy(img).to(device)
+        img = img.half() if half else img.float()  # uint8 to fp16/32
+        img /= 255.0  # 0 - 255 to 0.0 - 1.0
+        if img.ndimension() == 3:
+            img = img.unsqueeze(0)
+
+        # Inference
+        t1 = time_synchronized()
+        pred = model(img, augment=opt.augment)[0]
+
+        # Apply NMS
+        pred = non_max_suppression(pred, opt.conf_thres, opt.iou_thres, classes=opt.classes, agnostic=opt.agnostic_nms)
+        t2 = time_synchronized()
+
+        # Apply Classifier
+        if classify:
+            pred = apply_classifier(pred, modelc, img, im0s)
+
+        # Process detections
+        for i, det in enumerate(pred):  # detections per image
+            if webcam:  # batch_size >= 1
+                p, s, im0 = Path(path[i]), '%g: ' % i, im0s[i].copy()
+            else:
+                p, s, im0 = Path(path), '', im0s
+
+            save_path = str(save_dir / p.name)
+            txt_path = str(save_dir / 'labels' / p.stem) + ('_%g' % dataset.frame if dataset.mode == 'video' else '')
+            s += '%gx%g ' % img.shape[2:]  # print string
+            gn = torch.tensor(im0.shape)[[1, 0, 1, 0]]  # normalization gain whwh
+            if len(det):
+                # Rescale boxes from img_size to im0 size
+                det[:, :4] = scale_coords(img.shape[2:], det[:, :4], im0.shape).round()
+
+                # Print results
+                for c in det[:, -1].unique():
+                    n = (det[:, -1] == c).sum()  # detections per class
+                    s += '%g %ss, ' % (n, names[int(c)])  # add to string
+
+                # Write results
+                for *xyxy, conf, cls in reversed(det):
+                    if save_txt:  # Write to file
+                        xywh = (xyxy2xywh(torch.tensor(xyxy).view(1, 4)) / gn).view(-1).tolist()  # normalized xywh
+                        line = (cls, *xywh, conf) if opt.save_conf else (cls, *xywh)  # label format
+                        with open(txt_path + '.txt', 'a') as f:
+                            f.write(('%g ' * len(line)).rstrip() % line + '\n')
+
+                    if save_img or view_img:  # Add bbox to image
+                        label = '%s %.2f' % (names[int(cls)], conf)
+                        plot_one_box(xyxy, im0, label=label, color=colors[int(cls)], line_thickness=3)
+
+            # Print time (inference + NMS)
+            print('%sDone. (%.3fs)' % (s, t2 - t1))
+
+            # Stream results
+            if view_img:
+                cv2.imshow(p, im0)
+                if cv2.waitKey(1) == ord('q'):  # q to quit
+                    raise StopIteration
+
+            # Save results (image with detections)
+            if save_img:
+                if dataset.mode == 'images':
+                    cv2.imwrite(save_path, im0)
+                else:
+                    if vid_path != save_path:  # new video
+                        vid_path = save_path
+                        if isinstance(vid_writer, cv2.VideoWriter):
+                            vid_writer.release()  # release previous video writer
+
+                        fourcc = 'mp4v'  # output video codec
+                        fps = vid_cap.get(cv2.CAP_PROP_FPS)
+                        w = int(vid_cap.get(cv2.CAP_PROP_FRAME_WIDTH))
+                        h = int(vid_cap.get(cv2.CAP_PROP_FRAME_HEIGHT))
+                        vid_writer = cv2.VideoWriter(save_path, cv2.VideoWriter_fourcc(*fourcc), fps, (w, h))
+                    vid_writer.write(im0)
+
+    if save_txt or save_img:
+        print('Results saved to %s' % save_dir)
+
+    print('Done. (%.3fs)' % (time.time() - t0))
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', nargs='+', type=str, default='yolov5s.pt', help='model.pt path(s)')
+    parser.add_argument('--source', type=str, default='data/images', help='source')  # file/folder, 0 for webcam
+    parser.add_argument('--img-size', type=int, default=640, help='inference size (pixels)')
+    parser.add_argument('--conf-thres', type=float, default=0.25, help='object confidence threshold')
+    parser.add_argument('--iou-thres', type=float, default=0.45, help='IOU threshold for NMS')
+    parser.add_argument('--device', default='', help='cuda device, i.e. 0 or 0,1,2,3 or cpu')
+    parser.add_argument('--view-img', action='store_true', help='display results')
+    parser.add_argument('--save-txt', action='store_true', help='save results to *.txt')
+    parser.add_argument('--save-conf', action='store_true', help='save confidences in --save-txt labels')
+    parser.add_argument('--classes', nargs='+', type=int, help='filter by class: --class 0, or --class 0 2 3')
+    parser.add_argument('--agnostic-nms', action='store_true', help='class-agnostic NMS')
+    parser.add_argument('--augment', action='store_true', help='augmented inference')
+    parser.add_argument('--update', action='store_true', help='update all models')
+    parser.add_argument('--project', default='runs/detect', help='save results to project/name')
+    parser.add_argument('--name', default='exp', help='save results to project/name')
+    parser.add_argument('--exist-ok', action='store_true', help='existing project/name ok, do not increment')
+    opt = parser.parse_args()
+    print(opt)
+
+    with torch.no_grad():
+        if opt.update:  # update all models (to fix SourceChangeWarning)
+            for opt.weights in ['yolov5s.pt', 'yolov5m.pt', 'yolov5l.pt', 'yolov5x.pt']:
+                detect()
+                strip_optimizer(opt.weights)
+        else:
+            detect()
--- a/export.py
+++ b/export.py
@ -0,0 +1,94 @@
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
+
+Usage:
+    $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
+"""
+
+import argparse
+import sys
+import time
+
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+
+import torch
+import torch.nn as nn
+
+import models
+from models.experimental import attempt_load
+from utils.activations import Hardswish
+from utils.general import set_logging, check_img_size
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    opt = parser.parse_args()
+    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
+    print(opt)
+    set_logging()
+    t = time.time()
+
+    # Load PyTorch model
+    model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model
+    labels = model.names
+
+    # Checks
+    gs = int(max(model.stride))  # grid size (max stride)
+    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
+
+    # Input
+    img = torch.zeros(opt.batch_size, 3, *opt.img_size)  # image size(1,3,320,192) iDetection
+
+    # Update model
+    for k, m in model.named_modules():
+        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+        if isinstance(m, models.common.Conv) and isinstance(m.act, nn.Hardswish):
+            m.act = Hardswish()  # assign activation
+        # if isinstance(m, models.yolo.Detect):
+        #     m.forward = m.forward_export  # assign forward (optional)
+    model.model[-1].export = True  # set Detect() layer export=True
+    y = model(img)  # dry run
+
+    # TorchScript export
+    try:
+        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
+        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
+        ts = torch.jit.trace(model, img)
+        ts.save(f)
+        print('TorchScript export success, saved as %s' % f)
+    except Exception as e:
+        print('TorchScript export failure: %s' % e)
+
+    # ONNX export
+    try:
+        import onnx
+
+        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+        f = opt.weights.replace('.pt', '.onnx')  # filename
+        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
+                          output_names=['classes', 'boxes'] if y is None else ['output'])
+
+        # Checks
+        onnx_model = onnx.load(f)  # load onnx model
+        onnx.checker.check_model(onnx_model)  # check onnx model
+        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+        print('ONNX export success, saved as %s' % f)
+    except Exception as e:
+        print('ONNX export failure: %s' % e)
+
+    # CoreML export
+    try:
+        import coremltools as ct
+
+        print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
+        # convert model from torchscript and apply pixel scaling as per detect.py
+        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        f = opt.weights.replace('.pt', '.mlmodel')  # filename
+        model.save(f)
+        print('CoreML export success, saved as %s' % f)
+    except Exception as e:
+        print('CoreML export failure: %s' % e)
+
+    # Finish
+    print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
--- a/hubconf.py
+++ b/hubconf.py
@ -0,0 +1,119 @@
+"""File for accessing YOLOv5 via PyTorch Hub https://pytorch.org/hub/
+
+Usage:
+    import torch
+    model = torch.hub.load('ultralytics/yolov5', 'yolov5s', pretrained=True, channels=3, classes=80)
+"""
+
+from pathlib import Path
+
+import torch
+
+from models.yolo import Model
+from utils.general import set_logging
+from utils.google_utils import attempt_download
+
+dependencies = ['torch', 'yaml']
+set_logging()
+
+
+def create(name, pretrained, channels, classes):
+    """Creates a specified YOLOv5 model
+
+    Arguments:
+        name (str): name of model, i.e. 'yolov5s'
+        pretrained (bool): load pretrained weights into the model
+        channels (int): number of input channels
+        classes (int): number of model classes
+
+    Returns:
+        pytorch model
+    """
+    config = Path(__file__).parent / 'models' / f'{name}.yaml'  # model.yaml path
+    try:
+        model = Model(config, channels, classes)
+        if pretrained:
+            fname = f'{name}.pt'  # checkpoint filename
+            attempt_download(fname)  # download if not found locally
+            ckpt = torch.load(fname, map_location=torch.device('cpu'))  # load
+            state_dict = ckpt['model'].float().state_dict()  # to FP32
+            state_dict = {k: v for k, v in state_dict.items() if model.state_dict()[k].shape == v.shape}  # filter
+            model.load_state_dict(state_dict, strict=False)  # load
+            if len(ckpt['model'].names) == classes:
+                model.names = ckpt['model'].names  # set class names attribute
+            # model = model.autoshape()  # for PIL/cv2/np inputs and NMS
+        return model
+
+    except Exception as e:
+        help_url = 'https://github.com/ultralytics/yolov5/issues/36'
+        s = 'Cache maybe be out of date, try force_reload=True. See %s for help.' % help_url
+        raise Exception(s) from e
+
+
+def yolov5s(pretrained=False, channels=3, classes=80):
+    """YOLOv5-small model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5s', pretrained, channels, classes)
+
+
+def yolov5m(pretrained=False, channels=3, classes=80):
+    """YOLOv5-medium model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5m', pretrained, channels, classes)
+
+
+def yolov5l(pretrained=False, channels=3, classes=80):
+    """YOLOv5-large model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5l', pretrained, channels, classes)
+
+
+def yolov5x(pretrained=False, channels=3, classes=80):
+    """YOLOv5-xlarge model from https://github.com/ultralytics/yolov5
+
+    Arguments:
+        pretrained (bool): load pretrained weights into the model, default=False
+        channels (int): number of input channels, default=3
+        classes (int): number of model classes, default=80
+
+    Returns:
+        pytorch model
+    """
+    return create('yolov5x', pretrained, channels, classes)
+
+
+if __name__ == '__main__':
+    model = create(name='yolov5s', pretrained=True, channels=3, classes=80)  # example
+    model = model.fuse().autoshape()  # for PIL/cv2/np inputs and NMS
+
+    # Verify inference
+    from PIL import Image
+
+    imgs = [Image.open(x) for x in Path('data/images').glob('*.jpg')]
+    results = model(imgs)
+    results.show()
+    results.print()
--- a/kneron/bie2nef.py
+++ b/kneron/bie2nef.py
@ -0,0 +1,44 @@
+import ktc
+import os
+import shutil
+import subprocess
+
+# 設定 `.bie` 和 `.nef` 的存放目錄
+onnx_dir = "runs/train/exp24/weights/"
+bie_file = os.path.join(onnx_dir, "input.kdp720.scaled.bie")  # 確保 `.bie` 路徑正確
+
+# 確保 `.bie` 檔案存在
+if not os.path.exists(bie_file):
+    raise FileNotFoundError(f"❌ Error: BIE file not found at {bie_file}")
+
+print(f"✅ Found BIE file: {bie_file}")
+
+# 初始化 ModelConfig
+km = ktc.ModelConfig(20008, "0001", "720", bie_path=bie_file)
+
+# 執行 `.nef` 轉換
+nef_model_path = ktc.compile([km])
+
+# 打印出 `.nef` 生成的路徑
+print(f"🔍 Generated NEF file at: {nef_model_path}")
+
+# 確保 `.nef` 轉換成功
+if not nef_model_path or not os.path.exists(nef_model_path):
+    raise RuntimeError(f"❌ Error: NEF model was not generated at {nef_model_path}")
+
+# 確保目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 移動 `.nef` 到指定資料夾
+nef_save_path = os.path.join(onnx_dir, os.path.basename(nef_model_path))
+shutil.copy(nef_model_path, nef_save_path)
+
+# **立即檢查 `.nef` 是否真的存在**
+if os.path.exists(nef_save_path):
+    print(f"\n✅ NEF file successfully saved to: {nef_save_path}")
+else:
+    raise RuntimeError(f"❌ Error: NEF file NOT found in {nef_save_path} after copying!")
+
+# **執行 `ls` 指令來確認 Python 內部真的看到 `.nef`**
+print("\n🔍 Listing files in target directory:")
+subprocess.run(["ls", "-lh", onnx_dir])
--- a/kneron/exporting/yolov5/init.py
+++ b/kneron/exporting/yolov5/init.py
--- a/kneron/exporting/yolov5/common.py
+++ b/kneron/exporting/yolov5/common.py
@ -0,0 +1,224 @@
+# This file contains modules common to various models
+import torch.nn as nn
+import torch
+import torch.nn.functional as F
+def autopad(k, p=None):  # kernel, padding
+    # Pad to 'same'
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+def DWConv(c1, c2, k=1, s=1, act=True):
+    # Depthwise convolution
+    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
+class Conv(nn.Module):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
+
+    def forward(self, x):
+        return self.act(self.bn(self.conv(x)))
+
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(Bottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(BottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+      
+class SPP(nn.Module):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super(SPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+    def forward(self, x):
+        x = self.cv1(x)
+        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
+
+
+class Focus(nn.Module):#
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+        w1_1 = torch.tensor([[[1., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w1_2 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[1., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w1_3 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[1., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w3_1 = torch.tensor([[[0., 1., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w3_2 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 1., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w3_3 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 1., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w2_1 = torch.tensor([[[0., 0., 0.],[1., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w2_2 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[1., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w2_3 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[1., 0., 0.],[0., 0., 0.]]])
+        w4_1 = torch.tensor([[[0., 0., 0.],[0., 1., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w4_2 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 1., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]]])
+        w4_3 = torch.tensor([[[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 0., 0.],[0., 0., 0.]], [[0., 0., 0.],[0., 1., 0.],[0., 0., 0.]]])
+        w1_1 = w1_1.view(1, 3, 3, 3)
+        w1_2 = w1_2.view(1, 3, 3, 3)
+        w1_3 = w1_3.view(1, 3, 3, 3)
+        w2_1 = w2_1.view(1, 3, 3, 3)
+        w2_2 = w2_2.view(1, 3, 3, 3)
+        w2_3 = w2_3.view(1, 3, 3, 3)
+        w3_1 = w3_1.view(1, 3, 3, 3)
+        w3_2 = w3_2.view(1, 3, 3, 3)
+        w3_3 = w3_3.view(1, 3, 3, 3)
+        w4_1 = w4_1.view(1, 3, 3, 3)
+        w4_2 = w4_2.view(1, 3, 3, 3)
+        w4_3 = w4_3.view(1, 3, 3, 3)    
+        self.w_cat = torch.cat([w1_1, w1_2,w1_3, w2_1,w2_2,w2_3, w3_1,w3_2,w3_3, w4_1,w4_2,w4_3], 0) 
+        self.p2d = (0, 2, 0, 2)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)      
+        # x = x.type(torch.cuda.FloatTensor)
+        #x_gt = self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+        x_pad = F.pad(x, self.p2d, 'constant', 0)
+        xx = F.conv2d(x_pad, self.w_cat.to(x.device),stride=2) 
+        xx = self.conv(xx)
+        #print(torch.sum(x_gt - xx))
+        return xx
+
+
+
+class Focus_ori(nn.Module):#
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        x = x.type(torch.cuda.FloatTensor)
+        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super(Concat, self).__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        return torch.cat(x, self.d)
+
+
+class Flatten(nn.Module):
+    # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
+    @staticmethod
+    def forward(x):
+        return x.view(x.size(0), -1)
+
+
+class Classify(nn.Module):
+    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Classify, self).__init__()
+        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)  # to x(b,c2,1,1)
+        self.flat = Flatten()
+
+    def forward(self, x):
+        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
+        return self.flat(self.conv(z))  # flatten to x(b,c2)
+
+class MixConv2d(nn.Module):
+    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        super(MixConv2d, self).__init__()
+        groups = len(k)
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+
+    def forward(self, x):
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+# class C3(nn.Module):
+#     # Cross Convolution CSP
+#     def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+#         super(C3, self).__init__()
+#         c_ = int(c2 * e)  # hidden channels
+#         self.cv1 = Conv(c1, c_, 1, 1)
+#         self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+#         self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+#         self.cv4 = Conv(2 * c_, c2, 1, 1)
+#         self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+#         self.act = nn.LeakyReLU(0.1, inplace=True)
+#         self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+
+#     def forward(self, x):
+#         y1 = self.cv3(self.m(self.cv1(x)))
+#         y2 = self.cv2(x)
+#         return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+class C3(nn.Module):
+    # CSP Bottleneck with 3 convolutions
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c1, c_, 1, 1)
+        self.cv3 = Conv(2 * c_, c2, 1)  # act=FReLU(c2)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        return self.cv3(torch.cat((self.m(self.cv1(x)), self.cv2(x)), dim=1))
--- a/kneron/exporting/yolov5/kneron_preprocessing/API.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/API.py
@ -0,0 +1,684 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import os
+from .funcs.utils import str2int, str2bool
+from . import Flow
+
+flow = Flow()
+flow.set_numerical_type('floating')
+flow_520 = Flow()
+flow_520.set_numerical_type('520')
+flow_720 = Flow()
+flow_720.set_numerical_type('720')
+
+DEFAULT = None
+default = {
+    'crop':{
+        'align_w_to_4':False
+        },
+    'resize':{
+        'type':'bilinear',
+        'calculate_ratio_using_CSim':False
+        }
+}
+
+def set_default_as_520():
+    """
+    Set some default parameter as 520 setting
+
+    crop.align_w_to_4 = True
+    crop.pad_square_to_4 = True
+    resize.type = 'fixed_520'
+    resize.calculate_ratio_using_CSim = True
+    """
+    global default
+    default['crop']['align_w_to_4'] = True
+    default['resize']['type'] = 'fixed_520'
+    default['resize']['calculate_ratio_using_CSim'] = True
+    return
+
+def set_default_as_floating():
+    """
+    Set some default parameter as floating setting
+
+    crop.align_w_to_4 = False
+    crop.pad_square_to_4 = False
+    resize.type = 'bilinear'
+    resize.calculate_ratio_using_CSim = False
+    """
+    global default
+    default['crop']['align_w_to_4'] = False
+    default['resize']['type'] = 'bilinear'
+    default['resize']['calculate_ratio_using_CSim'] = False
+    pass
+
+def print_info_on():
+    """
+    turn print infomation on.
+    """
+    flow.set_print_info(True)
+    flow_520.set_print_info(True)
+
+def print_info_off():
+    """
+    turn print infomation off.
+    """
+    flow.set_print_info(False)
+    flow_520.set_print_info(False)
+
+def load_image(image):
+    """
+    load_image function
+    load load_image and output as rgb888 format np.array
+
+    Args:
+        image: [np.array/str], can be np.array or image file path
+
+    Returns:
+        out: [np.array], rgb888 format
+
+    Examples:
+    """
+    image = flow.load_image(image, is_raw = False)
+    return image
+
+def load_bin(image, fmt=None, size=None):
+    """
+    load_bin function
+    load bin file and output as rgb888 format np.array
+
+    Args:
+        image: [str], bin file path
+        fmt: [str], "rgb888" / "rgb565" / "nir"
+        size: [tuble], (image_w, image_h)
+
+    Returns:
+        out: [np.array], rgb888 format
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.load_bin(image,'rgb565',(raw_w,raw_h))
+    """    
+    assert isinstance(size, tuple)
+    assert isinstance(fmt, str)
+    # assert (fmt.lower() in ['rgb888', "rgb565" , "nir",'RGB888', "RGB565" , "NIR", 'NIR888', 'nir888'])
+
+    image = flow.load_image(image, is_raw = True, raw_img_type='bin', raw_img_fmt = fmt, img_in_width = size[0], img_in_height = size[1])
+    flow.set_color_conversion(source_format=fmt, out_format = 'rgb888')
+    image,_ = flow.funcs['color'](image)
+    return image
+
+def load_hex(file, fmt=None, size=None):
+    """
+    load_hex function
+    load hex file and output as rgb888 format np.array
+
+    Args:
+        image: [str], hex file path
+        fmt: [str], "rgb888" / "yuv444" / "ycbcr444" / "yuv422" / "ycbcr422" / "rgb565"
+        size: [tuble], (image_w, image_h)
+
+    Returns:
+        out: [np.array], rgb888 format
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.load_hex(image,'rgb565',(raw_w,raw_h))
+    """  
+    assert isinstance(size, tuple)
+    assert isinstance(fmt, str)
+    assert (fmt.lower() in ['rgb888',"yuv444" , "ycbcr444" , "yuv422" , "ycbcr422" , "rgb565"])
+
+    image = flow.load_image(file, is_raw = True, raw_img_type='hex', raw_img_fmt = fmt, img_in_width = size[0], img_in_height = size[1])
+    flow.set_color_conversion(source_format=fmt, out_format = 'rgb888')
+    image,_ = flow.funcs['color'](image)
+    return image
+
+def dump_image(image, output=None, file_fmt='txt',image_fmt='rgb888',order=0):
+    """
+    dump_image function
+
+    dump txt, bin or hex, default is txt
+    image format as following format: RGB888, RGBA8888, RGB565, NIR, YUV444, YCbCr444, YUV422, YCbCr422, default is RGB888
+
+    Args:
+        image: [np.array/str], can be np.array or image file path
+        output: [str], dump file path
+        file_fmt: [str], "bin" / "txt" / "hex", set dump file format, default is txt
+        image_fmt: [str], RGB888 / RGBA8888 / RGB565 / NIR / YUV444 / YCbCr444 / YUV422 / YCbCr422, default is RGB888
+
+    Examples:
+        >>> kneron_preprocessing.API.dump_image(image_data,out_path,fmt='bin')
+    """
+    if isinstance(image, str):
+        image = load_image(image)
+
+    assert isinstance(image, np.ndarray)
+    if output is None:
+        return
+
+    flow.set_output_setting(is_dump=False, dump_format=file_fmt, image_format=image_fmt ,output_file=output)
+    flow.dump_image(image)
+    return
+
+def convert(image, out_fmt = 'RGB888', source_fmt = 'RGB888'):
+    """
+    color convert
+
+    Args:
+        image: [np.array], input
+        out_fmt: [str], "rgb888" / "rgba8888" / "rgb565" / "yuv" / "ycbcr" / "yuv422" / "ycbcr422"
+        source_fmt: [str], "rgb888" / "rgba8888" / "rgb565" / "yuv" / "ycbcr" / "yuv422" / "ycbcr422"
+
+    Returns:
+        out: [np.array]
+
+    Examples:
+
+    """  
+    flow.set_color_conversion(source_format = source_fmt, out_format=out_fmt, simulation=False)
+    image,_ = flow.funcs['color'](image)
+    return image
+
+def get_crop_range(box,align_w_to_4=DEFAULT, pad_square_to_4=False,rounding_type=0):
+    """
+    get exact crop box according different setting
+
+    Args:
+        box: [tuble], (x1, y1, x2, y2)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+
+    Returns:
+        out: [tuble,4], (crop_x1, crop_y1, crop_x2, crop_y2) 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.get_crop_range((272,145,461,341), align_w_to_4=True, pad_square_to_4=True)
+        (272, 145, 460, 341)
+    """  
+    if box is None:
+        return (0,0,0,0)
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='specific', start_x=box[0],start_y=box[1],end_x=box[2],end_y=box[3], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4,rounding_type=rounding_type)
+    image = np.zeros((1,1,3)).astype('uint8')
+    _,info = flow.funcs['crop'](image)
+    
+    return info['box']
+
+def crop(image, box=None, align_w_to_4=DEFAULT, pad_square_to_4=False,rounding_type=0 ,info_out = {}):
+    """
+    crop function
+
+    specific crop range by box
+
+    Args:
+        image: [np.array], input
+        box: [tuble], (x1, y1, x2, y2)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+        info_out: [dic], save the final crop box into info_out['box']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop(image_data,(272,145,461,341), align_w_to_4=True, info_out=info)
+        >>> info['box']
+        (272, 145, 460, 341)
+
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop(image_data,(272,145,461,341), pad_square_to_4=True, info_out=info)
+        >>> info['box']
+        (268, 145, 464, 341)
+    """  
+    assert isinstance(image, np.ndarray)
+    if box is None:
+        return image
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='specific', start_x=box[0],start_y=box[1],end_x=box[2],end_y=box[3], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4,rounding_type=rounding_type)
+    image,info = flow.funcs['crop'](image)
+    
+    info_out['box'] = info['box']
+    return image
+
+def crop_center(image, range=None, align_w_to_4=DEFAULT, pad_square_to_4=False,rounding_type=0 ,info_out = {}):
+    """
+    crop function
+
+    center crop by range
+
+    Args:
+        image: [np.array], input
+        range: [tuble], (crop_w, crop_h)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+        info_out: [dic], save the final crop box into info_out['box']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_center(image_data,(102,40), align_w_to_4=True,info_out=info)
+        >>> info['box']
+        (268, 220, 372, 260)
+
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_center(image_data,(102,40), pad_square_to_4=True, info_out=info)
+        >>> info['box']
+        (269, 192, 371, 294)
+    """   
+    assert isinstance(image, np.ndarray)
+    if range is None:
+        return image
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='center', crop_w=range[0],crop_h=range[1], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4,rounding_type=rounding_type)
+    image,info = flow.funcs['crop'](image)
+
+    info_out['box'] = info['box']
+    return image
+
+def crop_corner(image, range=None, align_w_to_4=DEFAULT,pad_square_to_4=False,rounding_type=0 ,info_out = {}):
+    """
+    crop function
+
+    corner crop by range
+
+    Args:
+        image: [np.array], input
+        range: [tuble], (crop_w, crop_h)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+        info_out: [dic], save the final crop box into info_out['box']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_corner(image_data,(102,40), align_w_to_4=True,info_out=info)
+        >>> info['box']
+        (0, 0, 104, 40)
+
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_corner(image_data,(102,40), pad_square_to_4=True,info_out=info)
+        >>> info['box']
+        (0, -28, 102, 74)
+    """
+    assert isinstance(image, np.ndarray)
+    if range is None:
+        return image
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='corner', crop_w=range[0],crop_h=range[1], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4)
+    image, info = flow.funcs['crop'](image)
+
+    info_out['box'] = info['box']
+    return image
+
+def resize(image, size=None, keep_ratio = True, zoom = True, type=DEFAULT, calculate_ratio_using_CSim = DEFAULT, info_out = {}):
+    """
+    resize function
+
+    resize type can be bilinear or bilicubic as floating type, fixed or fixed_520/fixed_720 as fixed type.
+    fixed_520/fixed_720 type has add some function to simulate 520/720 bug.
+
+    Args:
+        image: [np.array], input
+        size: [tuble], (input_w, input_h)
+        keep_ratio: [bool], keep_ratio or not, default True
+        zoom: [bool], enable resize can zoom image or not, default True
+        type: [str], "bilinear" / "bilicubic" / "cv2" / "fixed" / "fixed_520" / "fixed_720"
+        calculate_ratio_using_CSim: [bool], calculate the ratio and scale using Csim function and C float, default False
+        info_out: [dic], save the final scale size(w,h) into info_out['size']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.resize(image_data,size=(56,56),type='fixed',info_out=info)
+        >>> info_out['size']
+        (54,56)
+    """
+    assert isinstance(image, np.ndarray)
+    if size is None:
+        return image
+    if type is None:
+        type = default['resize']['type']
+    if calculate_ratio_using_CSim is None:
+        calculate_ratio_using_CSim = default['resize']['calculate_ratio_using_CSim']
+
+    flow.set_resize(resize_w = size[0], resize_h = size[1], type=type, keep_ratio=keep_ratio,zoom=zoom, calculate_ratio_using_CSim=calculate_ratio_using_CSim)
+    image, info = flow.funcs['resize'](image)
+    info_out['size'] = info['size']
+
+    return image
+
+def pad(image, pad_l=0, pad_r=0, pad_t=0, pad_b=0, pad_val=0):
+    """
+    pad function
+
+    specific left, right, top and bottom pad size.
+
+    Args:
+        image[np.array]: input
+        pad_l: [int], pad size from left, default 0
+        pad_r: [int], pad size form right, default 0
+        pad_t: [int], pad size from top, default 0
+        pad_b: [int], pad size form bottom, default 0
+        pad_val: [float], the value of pad, , default 0 
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.pad(image_data,20,40,20,40,-0.5)
+    """
+    assert isinstance(image, np.ndarray)
+
+    flow.set_padding(type='specific',pad_l=pad_l,pad_r=pad_r,pad_t=pad_t,pad_b=pad_b,pad_val=pad_val)
+    image, _ = flow.funcs['padding'](image)
+    return image
+
+def pad_center(image,size=None, pad_val=0):
+    """
+    pad function
+
+    center pad with pad size.
+
+    Args:
+        image[np.array]: input
+        size: [tuble], (padded_size_w, padded_size_h)
+        pad_val: [float], the value of pad, , default 0 
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.pad_center(image_data,size=(56,56),pad_val=-0.5)
+    """
+    assert isinstance(image, np.ndarray)
+    if size is None:
+        return image
+    assert ( (image.shape[0] <= size[1]) & (image.shape[1] <= size[0]) )
+
+    flow.set_padding(type='center',padded_w=size[0],padded_h=size[1],pad_val=pad_val)
+    image, _ = flow.funcs['padding'](image)
+    return image
+
+def pad_corner(image,size=None, pad_val=0):
+    """
+    pad function
+
+    corner pad with pad size.
+
+    Args:
+        image[np.array]: input
+        size: [tuble], (padded_size_w, padded_size_h)
+        pad_val: [float], the value of pad, , default 0 
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.pad_corner(image_data,size=(56,56),pad_val=-0.5)
+    """   
+    assert isinstance(image, np.ndarray)
+    if size is None:
+        return image
+    assert ( (image.shape[0] <= size[1]) & (image.shape[1] <= size[0]) )
+
+    flow.set_padding(type='corner',padded_w=size[0],padded_h=size[1],pad_val=pad_val)
+    image, _ = flow.funcs['padding'](image)
+    return image
+
+def norm(image,scale=256.,bias=-0.5, mean=None, std=None):
+    """
+    norm function
+    
+    x = (x/scale - bias)
+    x[0,1,2] = x - mean[0,1,2]
+    x[0,1,2] = x / std[0,1,2]
+
+    Args:
+        image: [np.array], input
+        scale: [float], default = 256
+        bias: [float], default = -0.5
+        mean: [tuble,3], default = None
+        std: [tuble,3], default = None
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.norm(image_data)
+        >>> image_data = kneron_preprocessing.API.norm(image_data,mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    """  
+    assert isinstance(image, np.ndarray)
+
+    flow.set_normalize(type='specific',scale=scale,  bias=bias, mean=mean, std =std)
+    image, _ = flow.funcs['normalize'](image)
+    return image
+
+def inproc_520(image,raw_fmt='rgb565',raw_size=None,npu_size=None, crop_box=None, pad_mode=0, norm='kneron', gray=False, rotate=0, radix=8, bit_width=8, round_w_to_16=True, NUM_BANK_LINE=32,BANK_ENTRY_CNT=512,MAX_IMG_PREPROC_ROW_NUM=511,MAX_IMG_PREPROC_COL_NUM=256):
+    """
+    inproc_520
+
+    Args:
+        image: [np.array], input
+        crop_box: [tuble], (x1, y1, x2, y2), if None will skip crop
+        pad_mode: [int], 0: pad 2 sides, 1: pad 1 side, 2: no pad. default = 0
+        norm: [str], default = 'kneron'
+        rotate: [int], 0 / 1 / 2 ,default = 0
+        radix: [int], default = 8
+        bit_width: [int], default = 8
+        round_w_to_16: [bool], default = True
+        gray: [bool], default = False
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.inproc_520(image_data,npu_size=(56,56),crop_box=(272,145,460,341),pad_mode=1)
+    """  
+    # assert isinstance(image, np.ndarray)
+
+    if (not isinstance(image, np.ndarray)):
+        flow_520.set_raw_img(is_raw_img='yes',raw_img_type = 'bin',raw_img_fmt=raw_fmt, img_in_width=raw_size[0], img_in_height=raw_size[1])
+    else:
+        flow_520.set_raw_img(is_raw_img='no')
+        flow_520.set_color_conversion(source_format='rgb888')
+
+    if npu_size is None:
+        return image
+
+    flow_520.set_model_size(w=npu_size[0],h=npu_size[1])
+
+    ## Crop
+    if crop_box != None:
+        flow_520.set_crop(start_x=crop_box[0],start_y=crop_box[1],end_x=crop_box[2],end_y=crop_box[3])
+        crop_fisrt = True
+    else:
+        crop_fisrt = False
+
+    ## Color
+    if gray:
+        flow_520.set_color_conversion(out_format='l',simulation='no')
+    else:
+        flow_520.set_color_conversion(out_format='rgb888',simulation='no')
+
+    ## Resize & Pad
+    pad_mode = str2int(pad_mode)
+    if (pad_mode == 0):
+        pad_type = 'center'
+        resize_keep_ratio = 'yes'
+    elif (pad_mode == 1):
+        pad_type = 'corner'
+        resize_keep_ratio = 'yes'
+    else:
+        pad_type = 'center'
+        resize_keep_ratio = 'no'
+    
+    flow_520.set_resize(keep_ratio=resize_keep_ratio)
+    flow_520.set_padding(type=pad_type)
+
+    ## Norm
+    flow_520.set_normalize(type=norm)
+
+    ## 520 inproc
+    flow_520.set_520_setting(radix=radix,bit_width=bit_width,rotate=rotate,crop_fisrt=crop_fisrt,round_w_to_16=round_w_to_16,NUM_BANK_LINE=NUM_BANK_LINE,BANK_ENTRY_CNT=BANK_ENTRY_CNT,MAX_IMG_PREPROC_ROW_NUM=MAX_IMG_PREPROC_ROW_NUM,MAX_IMG_PREPROC_COL_NUM=MAX_IMG_PREPROC_COL_NUM)
+    image_data, _ = flow_520.run_whole_process(image)
+
+    return image_data
+
+def inproc_720(image,raw_fmt='rgb565',raw_size=None,npu_size=None, crop_box=None, pad_mode=0, norm='kneron', gray=False):
+    """
+    inproc_720
+
+    Args:
+        image: [np.array], input
+        crop_box: [tuble], (x1, y1, x2, y2), if None will skip crop
+        pad_mode: [int], 0: pad 2 sides, 1: pad 1 side, 2: no pad. default = 0
+        norm: [str], default = 'kneron'
+        rotate: [int], 0 / 1 / 2 ,default = 0
+        radix: [int], default = 8
+        bit_width: [int], default = 8
+        round_w_to_16: [bool], default = True
+        gray: [bool], default = False
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.inproc_520(image_data,npu_size=(56,56),crop_box=(272,145,460,341),pad_mode=1)
+    """  
+    # assert isinstance(image, np.ndarray)
+
+    if (not isinstance(image, np.ndarray)):
+        flow_720.set_raw_img(is_raw_img='yes',raw_img_type = 'bin',raw_img_fmt=raw_fmt, img_in_width=raw_size[0], img_in_height=raw_size[1])
+    else:
+        flow_720.set_raw_img(is_raw_img='no')
+        flow_720.set_color_conversion(source_format='rgb888')
+
+    if npu_size is None:
+        return image
+
+    flow_720.set_model_size(w=npu_size[0],h=npu_size[1])
+
+    ## Crop
+    if crop_box != None:
+        flow_720.set_crop(start_x=crop_box[0],start_y=crop_box[1],end_x=crop_box[2],end_y=crop_box[3])
+        crop_fisrt = True
+    else:
+        crop_fisrt = False
+
+    ## Color
+    if gray:
+        flow_720.set_color_conversion(out_format='l',simulation='no')
+    else:
+        flow_720.set_color_conversion(out_format='rgb888',simulation='no')
+
+    ## Resize & Pad
+    pad_mode = str2int(pad_mode)
+    if (pad_mode == 0):
+        pad_type = 'center'
+        resize_keep_ratio = 'yes'
+    elif (pad_mode == 1):
+        pad_type = 'corner'
+        resize_keep_ratio = 'yes'
+    else:
+        pad_type = 'center'
+        resize_keep_ratio = 'no'
+    
+    flow_720.set_resize(keep_ratio=resize_keep_ratio)
+    flow_720.set_padding(type=pad_type)
+
+    ## 720 inproc
+    # flow_720.set_720_setting(radix=radix,bit_width=bit_width,rotate=rotate,crop_fisrt=crop_fisrt,round_w_to_16=round_w_to_16,NUM_BANK_LINE=NUM_BANK_LINE,BANK_ENTRY_CNT=BANK_ENTRY_CNT,MAX_IMG_PREPROC_ROW_NUM=MAX_IMG_PREPROC_ROW_NUM,MAX_IMG_PREPROC_COL_NUM=MAX_IMG_PREPROC_COL_NUM)
+    image_data, _ = flow_720.run_whole_process(image)
+
+    return image_data
+
+def bit_match(data1, data2):
+    """
+    bit_match function
+
+    check data1 is equal to data2 or not.
+
+    Args:
+        data1: [np.array / str], can be array or txt/bin file
+        data2: [np.array / str], can be array or txt/bin file
+
+    Returns:
+        out1: [bool], is match or not
+        out2: [np.array], if not match, save the position for mismatched data
+
+    Examples:
+        >>> result, mismatched = kneron_preprocessing.API.bit_match(data1,data2)
+    """
+    if isinstance(data1, str):
+        if os.path.splitext(data1)[1] == '.bin':
+            data1 = np.fromfile(data1, dtype='uint8')
+        elif os.path.splitext(data1)[1] == '.txt':
+            data1 = np.loadtxt(data1)
+    
+    assert isinstance(data1, np.ndarray)
+
+    if isinstance(data2, str):
+        if os.path.splitext(data2)[1] == '.bin':
+            data2 = np.fromfile(data2, dtype='uint8')
+        elif os.path.splitext(data2)[1] == '.txt':
+            data2 = np.loadtxt(data2)
+
+    assert isinstance(data2, np.ndarray)
+
+
+    data1 = data1.reshape((-1,1))
+    data2 = data2.reshape((-1,1))
+
+    if not(len(data1) == len(data2)):
+        print('error len')
+        return False, np.zeros((1))
+    else: 
+        ans = data2 - data1    
+        if len(np.where(ans>0)[0]) > 0:
+            print('error',np.where(ans>0)[0])
+            return False, np.where(ans>0)[0]
+        else:
+            print('pass')
+            return True, np.zeros((1))
+
+def cpr_to_crp(x_start, x_end, y_start, y_end, pad_l, pad_r, pad_t, pad_b, rx_start, rx_end, ry_start, ry_end):
+    """
+    calculate the parameters of crop->pad->resize flow  to HW crop->resize->padding flow
+
+    Args:
+
+    Returns:
+
+    Examples:
+
+    """
+    pad_l = round(pad_l * (rx_end-rx_start) / (x_end - x_start + pad_l + pad_r))
+    pad_r = round(pad_r * (rx_end-rx_start) / (x_end - x_start + pad_l + pad_r)) 
+    pad_t = round(pad_t * (ry_end-ry_start) / (y_end - y_start + pad_t + pad_b))
+    pad_b = round(pad_b * (ry_end-ry_start) / (y_end - y_start + pad_t + pad_b))
+
+    rx_start +=pad_l
+    rx_end -=pad_r
+    ry_start +=pad_t
+    ry_end -=pad_b
+
+    return x_start, x_end, y_start, y_end, pad_l, pad_r, pad_t, pad_b, rx_start, rx_end, ry_start, ry_end
--- a/kneron/exporting/yolov5/kneron_preprocessing/Cflow.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/Cflow.py
@ -0,0 +1,172 @@
+import numpy as np
+import argparse
+import kneron_preprocessing
+
+def main_(args):
+    image = args.input_file
+    filefmt = args.file_fmt
+    if filefmt == 'bin':
+        raw_format = args.raw_format
+        raw_w = args.input_width
+        raw_h = args.input_height
+
+        image_data = kneron_preprocessing.API.load_bin(image,raw_format,(raw_w,raw_h))
+    else:
+        image_data = kneron_preprocessing.API.load_image(image)
+
+
+    npu_w = args.width
+    npu_h = args.height
+
+    crop_first = True if args.crop_first == "True" else False
+    if crop_first:
+        x1 = args.x_pos
+        y1 = args.y_pos
+        x2 = args.crop_w + x1
+        y2 = args.crop_h + y1
+        crop_box = [x1,y1,x2,y2]
+    else:
+        crop_box = None
+
+    pad_mode = args.pad_mode
+    norm_mode = args.norm_mode
+    bitwidth = args.bitwidth
+    radix = args.radix
+    rotate = args.rotate_mode
+
+    ##
+    image_data = kneron_preprocessing.API.inproc_520(image_data,npu_size=(npu_w,npu_h),crop_box=crop_box,pad_mode=pad_mode,norm=norm_mode,rotate=rotate,radix=radix,bit_width=bitwidth)
+
+    output_file = args.output_file
+    kneron_preprocessing.API.dump_image(image_data,output_file,'bin','rgba')
+
+    return
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser(
+        description="preprocessing"
+        )
+
+    argparser.add_argument(
+        '-i',
+        '--input_file',
+        help="input file name"
+        )
+
+    argparser.add_argument(
+        '-ff',
+        '--file_fmt',
+        help="input file format, jpg or bin"
+        )
+
+    argparser.add_argument(
+        '-rf',
+        '--raw_format',
+        help="input file image format, rgb or rgb565 or nir"
+        )
+
+    argparser.add_argument(
+        '-i_w',
+        '--input_width',
+        type=int,
+        help="input image width"
+        )
+
+    argparser.add_argument(
+        '-i_h',
+        '--input_height',
+        type=int,
+        help="input image height"
+        )
+
+    argparser.add_argument(
+        '-o',
+        '--output_file',
+        help="output file name"
+        )
+
+    argparser.add_argument(
+        '-s_w',
+        '--width',
+        type=int,
+        help="output width for npu input",
+        )
+
+    argparser.add_argument(
+        '-s_h',
+        '--height',
+        type=int,
+        help="output height for npu input",
+        )
+
+    argparser.add_argument(
+        '-c_f',
+        '--crop_first',
+        help="crop first True or False",
+        )
+
+    argparser.add_argument(
+        '-x',
+        '--x_pos',
+        type=int,
+        help="left up coordinate x",
+        )
+
+    argparser.add_argument(
+        '-y',
+        '--y_pos',
+        type=int,
+        help="left up coordinate y",
+        )
+
+    argparser.add_argument(
+        '-c_w',
+        '--crop_w',
+        type=int,
+        help="crop width",
+        )
+
+    argparser.add_argument(
+        '-c_h',
+        '--crop_h',
+        type=int,
+        help="crop height",
+        )
+
+    argparser.add_argument(
+        '-p_m',
+        '--pad_mode',
+        type=int,
+        help=" 0: pad 2 sides, 1: pad 1 side, 2: no pad.",
+        )
+
+    argparser.add_argument(
+        '-n_m',
+        '--norm_mode',
+        help="normalizaton mode: yolo, kneron, tf."
+        )
+
+    argparser.add_argument(
+        '-r_m',
+        '--rotate_mode',
+        type=int,
+        help="rotate mode:0,1,2"
+        )
+
+    argparser.add_argument(
+        '-bw',
+        '--bitwidth',
+        type=int,
+        help="Int for bitwidth"
+        )
+    
+    argparser.add_argument(
+        '-r',
+        '--radix',
+        type=int,
+        help="Int for radix"
+        )
+
+    args = argparser.parse_args()
+    main_(args)
--- a/kneron/exporting/yolov5/kneron_preprocessing/Flow.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/Flow.py
--- a/kneron/exporting/yolov5/kneron_preprocessing/init.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/init.py
@ -0,0 +1,2 @@
+from .Flow import *
+from .API import *
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/ColorConversion.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/ColorConversion.py
@ -0,0 +1,285 @@
+import numpy as np
+from PIL import Image
+from .utils import signed_rounding, clip, str2bool
+
+format_bit = 10
+c00_yuv = 1
+c02_yuv = 1436
+c10_yuv = 1
+c11_yuv = -354
+c12_yuv = -732
+c20_yuv = 1
+c21_yuv = 1814
+c00_ycbcr = 1192
+c02_ycbcr = 1634
+c10_ycbcr = 1192
+c11_ycbcr = -401
+c12_ycbcr = -833
+c20_ycbcr = 1192
+c21_ycbcr = 2065
+
+Matrix_ycbcr_to_rgb888 = np.array(
+    [[1.16438356e+00,  1.16438356e+00,  1.16438356e+00],
+     [2.99747219e-07, - 3.91762529e-01,  2.01723263e+00],
+     [1.59602686e+00, - 8.12968294e-01,  3.04059479e-06]])
+
+Matrix_rgb888_to_ycbcr = np.array(
+    [[0.25678824, - 0.14822353,  0.43921569],
+     [0.50412941, - 0.29099216, - 0.36778824],
+     [0.09790588,  0.43921569, - 0.07142745]])
+
+Matrix_rgb888_to_yuv = np.array(
+    [[ 0.29899106, -0.16877996,  0.49988381],
+    [ 0.5865453,  -0.33110385, -0.41826072],
+    [ 0.11446364,  0.49988381, -0.08162309]])
+
+# Matrix_rgb888_to_yuv = np.array(
+#     [[0.299, - 0.147,   0.615],
+#      [0.587, - 0.289, - 0.515],
+#      [0.114,   0.436, - 0.100]])
+
+# Matrix_yuv_to_rgb888 = np.array(
+#     [[1.000,   1.000,  1.000],
+#      [0.000, - 0.394,  2.032],
+#      [1.140, - 0.581,  0.000]])
+
+class runner(object):
+    def __init__(self):
+        self.set = {
+            'print_info':'no',
+            'model_size':[0,0],
+            'numerical_type':'floating',
+            "source_format": "rgb888",
+            "out_format": "rgb888",
+            "options": {
+                "simulation": "no",
+                "simulation_format": "rgb888"
+            }
+        }
+
+    def update(self, **kwargs):
+        #
+        self.set.update(kwargs)
+
+        ## simulation
+        self.funs = []
+        if str2bool(self.set['options']['simulation']) and self.set['source_format'].lower() in ['RGB888', 'rgb888', 'RGB', 'rgb']:
+            if self.set['options']['simulation_format'].lower() in ['YUV422', 'yuv422', 'YUV', 'yuv']:
+                self.funs.append(self._ColorConversion_RGB888_to_YUV422)
+                self.set['source_format'] = 'YUV422'
+            elif self.set['options']['simulation_format'].lower() in ['YCBCR422', 'YCbCr422', 'ycbcr422', 'YCBCR', 'YCbCr', 'ycbcr']:
+                self.funs.append(self._ColorConversion_RGB888_to_YCbCr422)
+                self.set['source_format'] = 'YCbCr422'
+            elif self.set['options']['simulation_format'].lower() in['RGB565', 'rgb565']:
+                self.funs.append(self._ColorConversion_RGB888_to_RGB565)
+                self.set['source_format'] = 'RGB565'
+        
+        ## to rgb888
+        if self.set['source_format'].lower() in ['YUV444', 'yuv444','YUV422', 'yuv422', 'YUV', 'yuv']:
+            self.funs.append(self._ColorConversion_YUV_to_RGB888)
+        elif self.set['source_format'].lower() in ['YCBCR444', 'YCbCr444', 'ycbcr444','YCBCR422', 'YCbCr422', 'ycbcr422', 'YCBCR', 'YCbCr', 'ycbcr']:
+            self.funs.append(self._ColorConversion_YCbCr_to_RGB888)
+        elif self.set['source_format'].lower() in ['RGB565', 'rgb565']:
+            self.funs.append(self._ColorConversion_RGB565_to_RGB888)
+        elif self.set['source_format'].lower() in ['l', 'L' , 'nir', 'NIR']:
+            self.funs.append(self._ColorConversion_L_to_RGB888)
+        elif self.set['source_format'].lower() in ['RGBA8888', 'rgba8888' , 'RGBA', 'rgba']:
+            self.funs.append(self._ColorConversion_RGBA8888_to_RGB888)
+
+        ## output format
+        if self.set['out_format'].lower() in ['L', 'l']:
+            self.funs.append(self._ColorConversion_RGB888_to_L)
+        elif self.set['out_format'].lower() in['RGB565', 'rgb565']:
+            self.funs.append(self._ColorConversion_RGB888_to_RGB565)
+        elif self.set['out_format'].lower() in['RGBA', 'RGBA8888','rgba','rgba8888']:
+            self.funs.append(self._ColorConversion_RGB888_to_RGBA8888)
+        elif self.set['out_format'].lower() in['YUV', 'YUV444','yuv','yuv444']:
+            self.funs.append(self._ColorConversion_RGB888_to_YUV444)
+        elif self.set['out_format'].lower() in['YUV422','yuv422']:
+            self.funs.append(self._ColorConversion_RGB888_to_YUV422)
+        elif self.set['out_format'].lower() in['YCBCR', 'YCBCR444','YCbCr','YCbCr444','ycbcr','ycbcr444']:
+            self.funs.append(self._ColorConversion_RGB888_to_YCbCr444)
+        elif self.set['out_format'].lower() in['YCBCR422','YCbCr422','ycbcr422']:
+            self.funs.append(self._ColorConversion_RGB888_to_YCbCr422)
+
+    def print_info(self):
+        print("<colorConversion>",
+              "source_format:", self.set['source_format'],
+              ', out_format:', self.set['out_format'],
+              ', simulation:', self.set['options']['simulation'],
+              ', simulation_format:', self.set['options']['simulation_format'])
+
+    def run(self, image_data):
+        assert isinstance(image_data, np.ndarray)
+        # print info
+        if str2bool(self.set['print_info']):
+            self.print_info()
+
+        # color
+        for _, f in enumerate(self.funs):
+            image_data = f(image_data)
+
+        # output
+        info = {}
+        return image_data, info
+
+    def _ColorConversion_RGB888_to_YUV444(self, image):
+        ## floating
+        image = image.astype('float')
+        image = (image @ Matrix_rgb888_to_yuv + 0.5).astype('uint8')
+        return image
+
+    def _ColorConversion_RGB888_to_YUV422(self, image):
+        # rgb888 to yuv444
+        image = self._ColorConversion_RGB888_to_YUV444(image)
+
+        # yuv444 to yuv422
+        u2 = image[:, 0::2, 1]
+        u4 = np.repeat(u2, 2, axis=1)
+        v2 = image[:, 1::2, 2]
+        v4 = np.repeat(v2, 2, axis=1)
+        image[..., 1] = u4
+        image[..., 2] = v4
+        return image
+           
+    def _ColorConversion_YUV_to_RGB888(self, image):
+        ## fixed
+        h, w, c = image.shape
+        image_f = image.reshape((h * w, c))
+        image_rgb_f = np.zeros(image_f.shape, dtype=np.uint8)
+
+        for i in range(h * w):
+            image_y = image_f[i, 0] *1024
+            if image_f[i, 1] > 127:
+                image_u = -((~(image_f[i, 1] - 1)) & 0xFF)
+            else:
+                image_u = image_f[i, 1]
+            if image_f[i, 2] > 127:
+                image_v = -((~(image_f[i, 2] - 1)) & 0xFF)
+            else:
+                image_v = image_f[i, 2]
+
+            image_r = c00_yuv * image_y + c02_yuv * image_v
+            image_g = c10_yuv * image_y + c11_yuv * image_u + c12_yuv * image_v
+            image_b = c20_yuv * image_y + c21_yuv * image_u
+
+            image_r = signed_rounding(image_r, format_bit)
+            image_g = signed_rounding(image_g, format_bit)
+            image_b = signed_rounding(image_b, format_bit)
+
+            image_r = image_r >> format_bit
+            image_g = image_g >> format_bit
+            image_b = image_b >> format_bit
+
+            image_rgb_f[i, 0] = clip(image_r, 0, 255)
+            image_rgb_f[i, 1] = clip(image_g, 0, 255)
+            image_rgb_f[i, 2] = clip(image_b, 0, 255)
+
+        image_rgb = image_rgb_f.reshape((h, w, c))
+        return image_rgb
+
+    def _ColorConversion_RGB888_to_YCbCr444(self, image):
+        ## floating
+        image = image.astype('float')
+        image = (image @ Matrix_rgb888_to_ycbcr + 0.5).astype('uint8')
+        image[:, :, 0] += 16
+        image[:, :, 1] += 128
+        image[:, :, 2] += 128
+
+        return image
+
+    def _ColorConversion_RGB888_to_YCbCr422(self, image):
+        # rgb888 to ycbcr444
+        image = self._ColorConversion_RGB888_to_YCbCr444(image)
+
+        # ycbcr444 to ycbcr422
+        cb2 = image[:, 0::2, 1]
+        cb4 = np.repeat(cb2, 2, axis=1)
+        cr2 = image[:, 1::2, 2]
+        cr4 = np.repeat(cr2, 2, axis=1)
+        image[..., 1] = cb4
+        image[..., 2] = cr4
+        return image
+
+    def _ColorConversion_YCbCr_to_RGB888(self, image):
+        ## floating
+        if (self.set['numerical_type'] == 'floating'):
+            image = image.astype('float')
+            image[:, :, 0] -= 16
+            image[:, :, 1] -= 128
+            image[:, :, 2] -= 128
+            image = ((image @ Matrix_ycbcr_to_rgb888) + 0.5).astype('uint8')
+            return image
+
+        ## fixed
+        h, w, c = image.shape
+        image_f = image.reshape((h * w, c))
+        image_rgb_f = np.zeros(image_f.shape, dtype=np.uint8)
+
+        for i in range(h * w):
+            image_y = (image_f[i, 0] - 16) * c00_ycbcr
+            image_cb = image_f[i, 1] - 128
+            image_cr = image_f[i, 2] - 128
+
+            image_r = image_y + c02_ycbcr * image_cr
+            image_g = image_y + c11_ycbcr * image_cb + c12_ycbcr * image_cr
+            image_b = image_y + c21_ycbcr * image_cb
+
+            image_r = signed_rounding(image_r, format_bit)
+            image_g = signed_rounding(image_g, format_bit)
+            image_b = signed_rounding(image_b, format_bit)
+
+            image_r = image_r >> format_bit
+            image_g = image_g >> format_bit
+            image_b = image_b >> format_bit
+
+            image_rgb_f[i, 0] = clip(image_r, 0, 255)
+            image_rgb_f[i, 1] = clip(image_g, 0, 255)
+            image_rgb_f[i, 2] = clip(image_b, 0, 255)
+
+        image_rgb = image_rgb_f.reshape((h, w, c))
+        return image_rgb
+
+    def _ColorConversion_RGB888_to_RGB565(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]>=3)
+        
+        image_rgb565 = np.zeros(image.shape, dtype=np.uint8)
+        image_rgb = image.astype('uint8')
+        image_rgb565[:, :, 0] = image_rgb[:, :, 0] >> 3
+        image_rgb565[:, :, 1] = image_rgb[:, :, 1] >> 2
+        image_rgb565[:, :, 2] = image_rgb[:, :, 2] >> 3
+        return image_rgb565
+
+    def _ColorConversion_RGB565_to_RGB888(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]==3)
+
+        image_rgb = np.zeros(image.shape, dtype=np.uint8)
+        image_rgb[:, :, 0] = image[:, :, 0] << 3
+        image_rgb[:, :, 1] = image[:, :, 1] << 2
+        image_rgb[:, :, 2] = image[:, :, 2] << 3
+        return image_rgb
+
+    def _ColorConversion_L_to_RGB888(self, image):
+        image_L = image.astype('uint8')
+        img = Image.fromarray(image_L).convert('RGB')
+        image_data = np.array(img).astype('uint8')
+        return image_data
+
+    def _ColorConversion_RGB888_to_L(self, image):
+        image_rgb = image.astype('uint8')
+        img = Image.fromarray(image_rgb).convert('L')
+        image_data = np.array(img).astype('uint8')
+        return image_data
+
+    def _ColorConversion_RGBA8888_to_RGB888(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]==4)
+        return image[:,:,:3]
+
+    def _ColorConversion_RGB888_to_RGBA8888(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]==3)
+        imageA = np.concatenate((image, np.zeros((image.shape[0], image.shape[1], 1), dtype=np.uint8) ), axis=2)
+        return imageA
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/Crop.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/Crop.py
@ -0,0 +1,145 @@
+import numpy as np
+from PIL import Image
+from .utils import str2int, str2float, str2bool, pad_square_to_4
+from .utils_520 import round_up_n
+from .Runner_base import Runner_base, Param_base
+
+class General(Param_base):
+    type = 'center'
+    align_w_to_4 = False
+    pad_square_to_4 = False
+    rounding_type = 0
+    crop_w = 0
+    crop_h = 0
+    start_x = 0.
+    start_y = 0.
+    end_x = 0.
+    end_y = 0.
+    def update(self, **dic):
+        self.type = dic['type']
+        self.align_w_to_4 = str2bool(dic['align_w_to_4'])
+        self.rounding_type = str2int(dic['rounding_type'])
+        self.crop_w = str2int(dic['crop_w'])
+        self.crop_h = str2int(dic['crop_h'])
+        self.start_x = str2float(dic['start_x'])
+        self.start_y = str2float(dic['start_y'])
+        self.end_x = str2float(dic['end_x'])
+        self.end_y = str2float(dic['end_y'])
+
+    def __str__(self):
+        str_out = [
+            ', type:',str(self.type),
+            ', align_w_to_4:',str(self.align_w_to_4),
+            ', pad_square_to_4:',str(self.pad_square_to_4),
+            ', crop_w:',str(self.crop_w),
+            ', crop_h:',str(self.crop_h),
+            ', start_x:',str(self.start_x),
+            ', start_y:',str(self.start_y),
+            ', end_x:',str(self.end_x),
+            ', end_y:',str(self.end_y)]
+        return(' '.join(str_out))
+       
+class runner(Runner_base):
+    ## overwrite the class in Runner_base
+    general = General()
+
+    def __str__(self):
+        return('<Crop>')
+
+    def update(self, **kwargs):
+        ##
+        super().update(**kwargs)
+
+        ##
+        if (self.general.start_x != self.general.end_x) and (self.general.start_y != self.general.end_y):
+            self.general.type = 'specific'
+        elif(self.general.type != 'specific'):
+            if self.general.crop_w == 0 or self.general.crop_h == 0:
+                self.general.crop_w = self.common.model_size[0]
+                self.general.crop_h = self.common.model_size[1]
+            assert(self.general.crop_w > 0)
+            assert(self.general.crop_h > 0)
+            assert(self.general.type.lower() in ['CENTER', 'Center', 'center', 'CORNER', 'Corner', 'corner'])
+        else:
+            assert(self.general.type == 'specific')
+
+    def run(self, image_data):
+        ## init
+        img = Image.fromarray(image_data)
+        w, h = img.size
+
+        ## get range
+        if self.general.type.lower() in ['CENTER', 'Center', 'center']:
+            x1, y1, x2, y2 = self._calcuate_xy_center(w, h)
+        elif self.general.type.lower() in ['CORNER', 'Corner', 'corner']:
+            x1, y1, x2, y2 = self._calcuate_xy_corner(w, h)
+        else:
+            x1 = self.general.start_x
+            y1 = self.general.start_y
+            x2 = self.general.end_x
+            y2 = self.general.end_y
+            assert( ((x1 != x2) and (y1 != y2)) )
+
+        ## rounding
+        if self.general.rounding_type == 0:
+            x1 = int(np.floor(x1))
+            y1 = int(np.floor(y1))
+            x2 = int(np.ceil(x2))
+            y2 = int(np.ceil(y2))
+        else:
+            x1 = int(round(x1))
+            y1 = int(round(y1))
+            x2 = int(round(x2))
+            y2 = int(round(y2))
+
+        if self.general.align_w_to_4:
+            # x1 = (x1+1) &(~3)  #//+2
+            # x2 = (x2+2) &(~3)  #//+1
+            x1 = (x1+3) &(~3)  #//+2
+            left = w - x2
+            left = (left+3) &(~3)
+            x2 = w - left
+
+        ## pad_square_to_4
+        if str2bool(self.general.pad_square_to_4):
+            x1,x2,y1,y2 = pad_square_to_4(x1,x2,y1,y2)
+
+        # do crop
+        box = (x1,y1,x2,y2)
+        img = img.crop(box)
+
+        # print info
+        if str2bool(self.common.print_info):
+            self.general.start_x = x1
+            self.general.start_y = y1
+            self.general.end_x = x2
+            self.general.end_y = y2
+            self.general.crop_w = x2 - x1
+            self.general.crop_h = y2 - y1
+            self.print_info()
+
+        # output
+        image_data = np.array(img)
+        info = {}
+        info['box'] = box
+
+        return image_data, info
+
+
+    ## protect fun
+    def _calcuate_xy_center(self, w, h):
+        x1 = w/2 - self.general.crop_w / 2
+        y1 = h/2 - self.general.crop_h / 2
+        x2 = w/2 + self.general.crop_w / 2
+        y2 = h/2 + self.general.crop_h / 2
+        return x1, y1, x2, y2
+
+    def _calcuate_xy_corner(self, _1, _2):
+        x1 = 0
+        y1 = 0
+        x2 = self.general.crop_w
+        y2 = self.general.crop_h
+        return x1, y1, x2, y2
+
+    def do_crop(self, image_data, startW, startH, endW, endH):
+        return image_data[startH:endH, startW:endW, :]
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/Normalize.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/Normalize.py
@ -0,0 +1,186 @@
+import numpy as np
+from .utils import str2bool, str2int, str2float, clip_ary
+
+class runner(object):
+    def __init__(self):
+        self.set = {
+            'general': {
+                'print_info':'no',
+                'model_size':[0,0],
+                'numerical_type':'floating',
+                'type': 'kneron'
+            },
+            'floating':{
+                "scale": 1,
+                "bias": 0,
+                "mean": "",
+                "std": "",
+            },
+            'hw':{
+                "radix":8,
+                "shift":"",
+                "sub":""
+            }
+        }
+        return
+
+    def update(self, **kwargs):
+        #
+        self.set.update(kwargs)
+
+        #
+        if self.set['general']['numerical_type'] == '520':
+            if self.set['general']['type'].lower() in ['TF', 'Tf', 'tf']:
+                self.fun_normalize = self._chen_520
+                self.shift = 7 - self.set['hw']['radix']
+                self.sub = 128
+            elif self.set['general']['type'].lower() in ['YOLO', 'Yolo', 'yolo']:
+                self.fun_normalize = self._chen_520
+                self.shift = 8 - self.set['hw']['radix']
+                self.sub = 0
+            elif self.set['general']['type'].lower() in ['KNERON', 'Kneron', 'kneron']:
+                self.fun_normalize = self._chen_520
+                self.shift = 8 - self.set['hw']['radix']
+                self.sub = 128
+            else:
+                self.fun_normalize = self._chen_520
+                self.shift = 0
+                self.sub = 0      
+        elif self.set['general']['numerical_type'] == '720':
+                self.fun_normalize = self._chen_720
+                self.shift = 0
+                self.sub = 0                   
+        else:
+            if self.set['general']['type'].lower() in ['TORCH', 'Torch', 'torch']:
+                self.fun_normalize = self._normalize_torch
+                self.set['floating']['scale'] = 255.
+                self.set['floating']['mean'] = [0.485, 0.456, 0.406]
+                self.set['floating']['std'] = [0.229, 0.224, 0.225]
+            elif self.set['general']['type'].lower() in ['TF', 'Tf', 'tf']:
+                self.fun_normalize = self._normalize_tf
+                self.set['floating']['scale'] = 127.5
+                self.set['floating']['bias'] = -1.
+            elif self.set['general']['type'].lower() in ['CAFFE', 'Caffe', 'caffe']:
+                self.fun_normalize = self._normalize_caffe
+                self.set['floating']['mean'] = [103.939, 116.779, 123.68]
+            elif self.set['general']['type'].lower() in ['YOLO', 'Yolo', 'yolo']:
+                self.fun_normalize = self._normalize_yolo
+                self.set['floating']['scale'] = 255.
+            elif self.set['general']['type'].lower() in ['KNERON', 'Kneron', 'kneron']:
+                self.fun_normalize = self._normalize_kneron
+                self.set['floating']['scale'] = 256.
+                self.set['floating']['bias'] = -0.5
+            else:
+                self.fun_normalize = self._normalize_customized
+                self.set['floating']['scale'] = str2float(self.set['floating']['scale'])
+                self.set['floating']['bias'] = str2float(self.set['floating']['bias'])
+                if self.set['floating']['mean'] != None:
+                    if len(self.set['floating']['mean']) != 3:
+                        self.set['floating']['mean'] = None
+                if self.set['floating']['std'] != None:
+                    if len(self.set['floating']['std']) != 3:
+                        self.set['floating']['std'] = None
+
+
+    def print_info(self):
+        if self.set['general']['numerical_type'] == '520':
+            print("<normalize>",
+            'numerical_type', self.set['general']['numerical_type'],
+            ", type:", self.set['general']['type'],
+            ', shift:',self.shift, 
+            ', sub:', self.sub)
+        else:
+            print("<normalize>",
+            'numerical_type', self.set['general']['numerical_type'],
+            ", type:", self.set['general']['type'],
+            ', scale:',self.set['floating']['scale'], 
+            ', bias:', self.set['floating']['bias'],
+            ', mean:', self.set['floating']['mean'],
+            ', std:',self.set['floating']['std'])
+
+    def run(self, image_data):
+        # print info
+        if str2bool(self.set['general']['print_info']):
+            self.print_info()
+
+        # norm
+        image_data = self.fun_normalize(image_data)
+
+        # output
+        info = {}
+        return image_data, info
+
+    def _normalize_torch(self, x):
+        if len(x.shape) != 3:
+            return x
+        x = x.astype('float')
+        x = x / self.set['floating']['scale']
+        x[..., 0] -= self.set['floating']['mean'][0]
+        x[..., 1] -= self.set['floating']['mean'][1]
+        x[..., 2] -= self.set['floating']['mean'][2]
+        x[..., 0] /= self.set['floating']['std'][0]
+        x[..., 1] /= self.set['floating']['std'][1]
+        x[..., 2] /= self.set['floating']['std'][2]
+        return x
+
+    def _normalize_tf(self, x):
+        # print('_normalize_tf')
+        x = x.astype('float')
+        x = x / self.set['floating']['scale']
+        x = x + self.set['floating']['bias']
+        return x
+
+    def _normalize_caffe(self, x):
+        if len(x.shape) != 3:
+            return x
+        x = x.astype('float')
+        x = x[..., ::-1]
+        x[..., 0] -= self.set['floating']['mean'][0]
+        x[..., 1] -= self.set['floating']['mean'][1]
+        x[..., 2] -= self.set['floating']['mean'][2]
+        return x
+
+    def _normalize_yolo(self, x):
+        # print('_normalize_yolo')
+        x = x.astype('float')
+        x = x / self.set['floating']['scale']
+        return x
+
+    def _normalize_kneron(self, x):
+        # print('_normalize_kneron')
+        x = x.astype('float')
+        x = x/self.set['floating']['scale']
+        x = x + self.set['floating']['bias']
+        return x
+
+    def _normalize_customized(self, x):
+        # print('_normalize_customized')
+        x = x.astype('float')
+        if  self.set['floating']['scale'] != 0:
+            x = x/ self.set['floating']['scale'] 
+        x = x + self.set['floating']['bias'] 
+        if self.set['floating']['mean'] is not None:
+            x[..., 0] -= self.set['floating']['mean'][0]
+            x[..., 1] -= self.set['floating']['mean'][1]
+            x[..., 2] -= self.set['floating']['mean'][2]
+        if self.set['floating']['std'] is not None:
+            x[..., 0] /= self.set['floating']['std'][0]
+            x[..., 1] /= self.set['floating']['std'][1]
+            x[..., 2] /= self.set['floating']['std'][2]
+
+        return x
+
+    def _chen_520(self, x):
+        # print('_chen_520')
+        x = (x - self.sub).astype('uint8')
+        x = (np.right_shift(x,self.shift))
+        x=x.astype('uint8')
+        return x
+
+    def _chen_720(self, x):
+        # print('_chen_720')
+        if self.shift == 1:
+            x = x + np.array([[self.sub], [self.sub], [self.sub]])
+        else:
+            x = x + np.array([[self.sub], [self.sub], [self.sub]])
+        return x
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/Padding.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/Padding.py
@ -0,0 +1,187 @@
+import numpy as np
+from PIL import Image
+from .utils import str2bool, str2int, str2float
+from .Runner_base import Runner_base, Param_base
+
+class General(Param_base):
+    type = ''
+    pad_val = ''
+    padded_w = ''
+    padded_h = ''
+    pad_l = ''
+    pad_r = ''
+    pad_t = ''
+    pad_b = ''
+    padding_ch = 3
+    padding_ch_type = 'RGB'
+    def update(self, **dic):
+        self.type = dic['type']
+        self.pad_val = dic['pad_val']
+        self.padded_w = str2int(dic['padded_w'])
+        self.padded_h = str2int(dic['padded_h'])
+        self.pad_l = str2int(dic['pad_l'])
+        self.pad_r = str2int(dic['pad_r'])
+        self.pad_t = str2int(dic['pad_t'])
+        self.pad_b = str2int(dic['pad_b'])
+
+    def __str__(self):
+        str_out = [
+            ', type:',str(self.type),
+            ', pad_val:',str(self.pad_val),
+            ', pad_l:',str(self.pad_l),
+            ', pad_r:',str(self.pad_r),
+            ', pad_r:',str(self.pad_t),
+            ', pad_b:',str(self.pad_b),
+            ', padding_ch:',str(self.padding_ch)]
+        return(' '.join(str_out))
+
+class Hw(Param_base):
+    radix = 8
+    normalize_type = 'floating'
+    def update(self, **dic):
+        self.radix = dic['radix']
+        self.normalize_type = dic['normalize_type']
+
+    def __str__(self):
+        str_out = [
+            ', radix:', str(self.radix),
+            ', normalize_type:',str(self.normalize_type)]
+        return(' '.join(str_out))
+
+
+class runner(Runner_base):
+    ## overwrite the class in Runner_base
+    general = General()
+    hw = Hw()
+
+    def __str__(self):
+        return('<Padding>')
+
+    def update(self, **kwargs):
+        super().update(**kwargs)
+
+        ## update pad type & pad length
+        if (self.general.pad_l != 0) or (self.general.pad_r != 0) or (self.general.pad_t != 0) or (self.general.pad_b != 0):
+            self.general.type = 'specific'
+            assert(self.general.pad_l >= 0)
+            assert(self.general.pad_r >= 0)
+            assert(self.general.pad_t >= 0)
+            assert(self.general.pad_b >= 0)
+        elif(self.general.type != 'specific'):
+            if self.general.padded_w == 0 or self.general.padded_h == 0:
+                self.general.padded_w = self.common.model_size[0]
+                self.general.padded_h = self.common.model_size[1]
+            assert(self.general.padded_w > 0)
+            assert(self.general.padded_h > 0)
+            assert(self.general.type.lower() in ['CENTER', 'Center', 'center', 'CORNER', 'Corner', 'corner'])
+        else:
+            assert(self.general.type == 'specific')
+            
+        ## decide pad_val & padding ch
+        # if numerical_type is floating
+        if (self.common.numerical_type == 'floating'):
+            if self.general.pad_val != 'edge':
+                self.general.pad_val = str2float(self.general.pad_val)
+            self.general.padding_ch = 3
+            self.general.padding_ch_type = 'RGB'
+        # if numerical_type is 520 or 720
+        else: 
+            if self.general.pad_val == '':
+                if self.hw.normalize_type.lower() in ['TF', 'Tf', 'tf']:
+                    self.general.pad_val = np.uint8(-128 >> (7 - self.hw.radix))
+                elif self.hw.normalize_type.lower() in ['YOLO', 'Yolo', 'yolo']:
+                    self.general.pad_val = np.uint8(0 >> (8 - self.hw.radix))
+                elif self.hw.normalize_type.lower() in ['KNERON', 'Kneron', 'kneron']:
+                    self.general.pad_val = np.uint8(-128 >> (8 - self.hw.radix))
+                else:
+                    self.general.pad_val = np.uint8(0 >> (8 - self.hw.radix))
+            else:
+                self.general.pad_val = str2int(self.general.pad_val)
+            self.general.padding_ch = 4
+            self.general.padding_ch_type = 'RGBA'
+
+    def run(self, image_data):
+        # init
+        shape = image_data.shape
+        w = shape[1]
+        h = shape[0]
+        if len(shape) < 3:
+            self.general.padding_ch = 1
+            self.general.padding_ch_type = 'L'
+        else:
+            if shape[2] == 3 and self.general.padding_ch == 4:
+                image_data = np.concatenate((image_data, np.zeros((h, w, 1), dtype=np.uint8) ), axis=2)
+                
+        ## padding
+        if self.general.type.lower() in ['CENTER',  'Center',  'center']:
+            img_pad = self._padding_center(image_data, w, h)
+        elif self.general.type.lower() in ['CORNER',  'Corner',  'corner']:
+            img_pad = self._padding_corner(image_data, w, h)
+        else:
+            img_pad = self._padding_sp(image_data, w, h)
+
+        # print info
+        if str2bool(self.common.print_info):
+            self.print_info()
+
+        # output
+        info = {}
+        return img_pad, info
+
+    ## protect fun
+    def _padding_center(self, img, ori_w, ori_h):
+        # img_pad = Image.new(self.general.padding_ch_type, (self.general.padded_w, self.general.padded_h), int(self.general.pad_val[0]))
+        # img = Image.fromarray(img)
+        # img_pad.paste(img, ((self.general.padded_w-ori_w)//2, (self.general.padded_h-ori_h)//2))
+        # return img_pad
+        padH = self.general.padded_h - ori_h
+        padW = self.general.padded_w - ori_w
+        self.general.pad_t = padH // 2
+        self.general.pad_b = (padH // 2) + (padH % 2)
+        self.general.pad_l = padW // 2
+        self.general.pad_r = (padW // 2) + (padW % 2)
+        if self.general.pad_l < 0 or self.general.pad_r <0 or self.general.pad_t <0 or self.general.pad_b<0:
+            return img
+        img_pad = self._padding_sp(img,ori_w,ori_h)
+        return img_pad
+
+    def _padding_corner(self, img, ori_w, ori_h):
+        # img_pad = Image.new(self.general.padding_ch_type, (self.general.padded_w, self.general.padded_h), self.general.pad_val)
+        # img_pad.paste(img, (0, 0))
+        self.general.pad_l = 0
+        self.general.pad_r = self.general.padded_w - ori_w
+        self.general.pad_t = 0
+        self.general.pad_b = self.general.padded_h - ori_h
+        if self.general.pad_l < 0 or self.general.pad_r <0 or self.general.pad_t <0 or self.general.pad_b<0:
+            return img
+        img_pad = self._padding_sp(img,ori_w,ori_h)
+        return img_pad
+
+    def _padding_sp(self, img, ori_w, ori_h):
+        # block_t = np.zeros((self.general.pad_t, self.general.pad_l + self.general.pad_r + ori_w, self.general.padding_ch), dtype=np.float)
+        # block_l = np.zeros((ori_h, self.general.pad_l, self.general.padding_ch), dtype=np.float)
+        # block_r = np.zeros((ori_h, self.general.pad_r, self.general.padding_ch), dtype=np.float)
+        # block_b = np.zeros((self.general.pad_b, self.general.pad_l + self.general.pad_r + ori_w, self.general.padding_ch), dtype=np.float)
+        # for i in range(self.general.padding_ch):
+        #     block_t[:, :, i] = np.ones(block_t[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        #     block_l[:, :, i] = np.ones(block_l[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        #     block_r[:, :, i] = np.ones(block_r[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        #     block_b[:, :, i] = np.ones(block_b[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        # padded_image_hor = np.concatenate((block_l, img, block_r), axis=1)
+        # padded_image = np.concatenate((block_t, padded_image_hor, block_b), axis=0)
+        # return padded_image
+        if self.general.padding_ch == 1:
+            pad_range = ( (self.general.pad_t, self.general.pad_b),(self.general.pad_l, self.general.pad_r) )
+        else:
+            pad_range = ((self.general.pad_t, self.general.pad_b),(self.general.pad_l, self.general.pad_r),(0,0))
+
+        if isinstance(self.general.pad_val, str):
+            if self.general.pad_val == 'edge':
+                padded_image = np.pad(img, pad_range, mode="edge")
+            else:
+                padded_image = np.pad(img, pad_range, mode="constant",constant_values=0)
+        else:
+            padded_image = np.pad(img, pad_range, mode="constant",constant_values=self.general.pad_val)
+        
+        return padded_image
+
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/Resize.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/Resize.py
@ -0,0 +1,237 @@
+import numpy as np
+import cv2
+from PIL import Image
+from .utils import str2bool, str2int
+from ctypes import c_float
+from .Runner_base import Runner_base, Param_base
+
+class General(Param_base):
+    type = 'bilinear'
+    keep_ratio = True
+    zoom = True
+    calculate_ratio_using_CSim = True
+    resize_w = 0
+    resize_h = 0
+    resized_w = 0
+    resized_h = 0
+    def update(self, **dic):
+        self.type = dic['type']
+        self.keep_ratio = str2bool(dic['keep_ratio'])
+        self.zoom = str2bool(dic['zoom'])
+        self.calculate_ratio_using_CSim = str2bool(dic['calculate_ratio_using_CSim'])
+        self.resize_w = str2int(dic['resize_w'])
+        self.resize_h = str2int(dic['resize_h'])
+
+    def __str__(self):
+        str_out = [
+            ', type:',str(self.type),
+            ', keep_ratio:',str(self.keep_ratio),
+            ', zoom:',str(self.zoom),
+            ', calculate_ratio_using_CSim:',str(self.calculate_ratio_using_CSim),
+            ', resize_w:',str(self.resize_w),
+            ', resize_h:',str(self.resize_h),
+            ', resized_w:',str(self.resized_w),
+            ', resized_h:',str(self.resized_h)]
+        return(' '.join(str_out))
+
+class Hw(Param_base):
+    resize_bit = 12
+    def update(self, **dic):
+        pass
+
+    def __str__(self):
+        str_out = [
+            ', resize_bit:',str(self.resize_bit)]
+        return(' '.join(str_out))
+
+class runner(Runner_base):
+    ## overwrite the class in Runner_base
+    general = General()
+    hw = Hw()
+
+    def __str__(self):
+        return('<Resize>')
+
+    def update(self, **kwargs):
+        super().update(**kwargs)
+        
+        ## if resize size has not been assigned, then it will take model size as resize size
+        if self.general.resize_w == 0 or self.general.resize_h == 0:
+            self.general.resize_w = self.common.model_size[0]
+            self.general.resize_h = self.common.model_size[1]
+        assert(self.general.resize_w > 0)
+        assert(self.general.resize_h > 0)
+
+        ##
+        if self.common.numerical_type == '520':
+            self.general.type = 'fixed_520'
+        elif self.common.numerical_type == '720':
+            self.general.type = 'fixed_720'
+        assert(self.general.type.lower() in ['BILINEAR',  'Bilinear',  'bilinear', 'BICUBIC',  'Bicubic',  'bicubic', 'FIXED',  'Fixed', 'fixed', 'FIXED_520',  'Fixed_520',  'fixed_520', 'FIXED_720', 'Fixed_720', 'fixed_720','CV', 'cv', 'opencv', 'OpenCV', 'CV2', 'cv2'])
+
+
+    def run(self, image_data):
+        ## init
+        ori_w = image_data.shape[1]
+        ori_h = image_data.shape[0]
+        info = {}
+
+        ##
+        if self.general.keep_ratio:
+            self.general.resized_w, self.general.resized_h = self.calcuate_scale_keep_ratio(self.general.resize_w,self.general.resize_h, ori_w, ori_h, self.general.calculate_ratio_using_CSim)
+        else:
+            self.general.resized_w = int(self.general.resize_w)
+            self.general.resized_h = int(self.general.resize_h)
+        assert(self.general.resized_w > 0)
+        assert(self.general.resized_h > 0)
+
+        ##
+        if (self.general.resized_w > ori_w) or (self.general.resized_h > ori_h):
+            if not self.general.zoom: 
+                info['size'] = (ori_w,ori_h)
+                if str2bool(self.common.print_info):
+                    print('no resize')
+                    self.print_info()
+                return image_data, info
+
+        ## resize
+        if self.general.type.lower() in ['BILINEAR',  'Bilinear',  'bilinear']:
+            image_data = self.do_resize_bilinear(image_data, self.general.resized_w, self.general.resized_h)
+        elif self.general.type.lower() in ['BICUBIC',  'Bicubic',  'bicubic']:
+            image_data = self.do_resize_bicubic(image_data, self.general.resized_w, self.general.resized_h)
+        elif self.general.type.lower() in ['CV',  'cv',  'opencv', 'OpenCV',  'CV2',  'cv2']:
+            image_data = self.do_resize_cv2(image_data, self.general.resized_w, self.general.resized_h)
+        elif self.general.type.lower() in ['FIXED',  'Fixed',  'fixed', 'FIXED_520',  'Fixed_520',  'fixed_520', 'FIXED_720', 'Fixed_720', 'fixed_720']:
+            image_data = self.do_resize_fixed(image_data, self.general.resized_w, self.general.resized_h, self.hw.resize_bit, self.general.type)
+
+       
+        # output
+        info['size'] = (self.general.resized_w, self.general.resized_h)
+
+        # print info
+        if str2bool(self.common.print_info):
+            self.print_info()
+
+        return image_data, info
+
+    def calcuate_scale_keep_ratio(self, tar_w, tar_h, ori_w, ori_h, calculate_ratio_using_CSim):
+        if not calculate_ratio_using_CSim:
+            scale_w = tar_w * 1.0 / ori_w*1.0
+            scale_h = tar_h * 1.0 / ori_h*1.0
+            scale = scale_w if scale_w < scale_h else scale_h
+            new_w = int(round(ori_w * scale))
+            new_h = int(round(ori_h * scale))
+            return new_w, new_h
+        
+        ## calculate_ratio_using_CSim
+        scale_w = c_float(tar_w * 1.0 / (ori_w * 1.0)).value
+        scale_h = c_float(tar_h * 1.0 / (ori_h * 1.0)).value
+        scale_ratio = 0.0
+        scale_target_w = 0
+        scale_target_h = 0
+        padH = 0
+        padW = 0
+
+        bScaleW = True if scale_w < scale_h else False
+        if bScaleW:
+            scale_ratio = scale_w
+            scale_target_w = int(c_float(scale_ratio * ori_w + 0.5).value)
+            scale_target_h = int(c_float(scale_ratio * ori_h + 0.5).value)
+            assert (abs(scale_target_w - tar_w) <= 1), "Error: scale down width cannot meet expectation\n"
+            padH = tar_h - scale_target_h
+            padW = 0
+            assert (padH >= 0), "Error: padH shouldn't be less than zero\n"
+        else:
+            scale_ratio = scale_h 
+            scale_target_w = int(c_float(scale_ratio * ori_w + 0.5).value)
+            scale_target_h = int(c_float(scale_ratio * ori_h + 0.5).value)
+            assert (abs(scale_target_h - tar_h) <= 1), "Error: scale down height cannot meet expectation\n"
+            padW = tar_w - scale_target_w
+            padH = 0
+            assert (padW >= 0), "Error: padW shouldn't be less than zero\n"
+        new_w = tar_w - padW
+        new_h = tar_h - padH
+        return new_w, new_h
+    
+    def do_resize_bilinear(self, image_data, resized_w, resized_h):
+        img = Image.fromarray(image_data)
+        img = img.resize((resized_w, resized_h), Image.BILINEAR)
+        image_data = np.array(img).astype('uint8')
+        return image_data        
+
+    def do_resize_bicubic(self, image_data, resized_w, resized_h):
+        img = Image.fromarray(image_data)
+        img = img.resize((resized_w, resized_h), Image.BICUBIC)
+        image_data = np.array(img).astype('uint8')
+        return image_data
+
+    def do_resize_cv2(self, image_data, resized_w, resized_h):
+        image_data = cv2.resize(image_data, (resized_w, resized_h))
+        image_data = np.array(image_data)
+        # image_data = np.array(image_data).astype('uint8')
+        return image_data
+
+    def do_resize_fixed(self, image_data, resized_w, resized_h, resize_bit, type):
+        if len(image_data.shape) < 3:
+            m, n = image_data.shape
+            tmp = np.zeros((m,n,3), dtype=np.uint8)
+            tmp[:,:,0] = image_data
+            image_data = tmp
+            c = 3
+            gray = True
+        else:
+            m, n, c = image_data.shape
+            gray = False
+
+        resolution = 1 << resize_bit
+
+        # Width
+        ratio = int(((n - 1) << resize_bit) / (resized_w - 1))
+        ratio_cnt = 0
+        src_x = 0
+        resized_image_w = np.zeros((m, resized_w, c), dtype=np.uint8)
+        
+        for dst_x in range(resized_w):
+            while ratio_cnt > resolution:
+                ratio_cnt = ratio_cnt - resolution
+                src_x = src_x + 1
+            mul1 = np.ones((m, c)) * (resolution - ratio_cnt)
+            mul2 = np.ones((m, c)) * ratio_cnt
+            resized_image_w[:, dst_x, :] = np.multiply(np.multiply(
+                image_data[:, src_x, :], mul1) + np.multiply(image_data[:, src_x + 1, :], mul2), 1/resolution)
+            ratio_cnt = ratio_cnt + ratio
+
+        # Height
+        ratio = int(((m - 1) << resize_bit) / (resized_h - 1))
+        ## NPU HW special case 2 , only on 520
+        if type.lower() in ['FIXED_520',  'Fixed_520',  'fixed_520']:
+            if (((ratio * (resized_h - 1)) % 4096 == 0) and ratio != 4096):
+                ratio -= 1
+
+        ratio_cnt = 0
+        src_x = 0
+        resized_image = np.zeros(
+            (resized_h, resized_w, c), dtype=np.uint8)
+        for dst_x in range(resized_h):
+            while ratio_cnt > resolution:
+                ratio_cnt = ratio_cnt - resolution
+                src_x = src_x + 1
+                       
+            mul1 = np.ones((resized_w, c)) * (resolution - ratio_cnt)
+            mul2 = np.ones((resized_w, c)) * ratio_cnt
+            
+            ## NPU HW special case 1 , both on 520 / 720
+            if (((dst_x > 0) and ratio_cnt == resolution) and (ratio != resolution)):
+                if type.lower() in ['FIXED_520',  'Fixed_520',  'fixed_520','FIXED_720',  'Fixed_720',  'fixed_720' ]:
+                    resized_image[dst_x, :, :] = np.multiply(np.multiply(
+                        resized_image_w[src_x+1, :, :], mul1) + np.multiply(resized_image_w[src_x + 2, :, :], mul2), 1/resolution)
+            else:
+                resized_image[dst_x, :, :] = np.multiply(np.multiply(
+                    resized_image_w[src_x, :, :], mul1) + np.multiply(resized_image_w[src_x + 1, :, :], mul2), 1/resolution)
+
+            ratio_cnt = ratio_cnt + ratio
+
+        if gray:
+            resized_image = resized_image[:,:,0]
+
+        return resized_image
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/Rotate.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/Rotate.py
@ -0,0 +1,45 @@
+import numpy as np
+from .utils import str2bool, str2int
+
+class runner(object):
+    def __init__(self, *args, **kwargs):
+        self.set = {
+            'operator': '',
+            "rotate_direction": 0,
+
+        }
+        self.update(*args, **kwargs)
+
+    def update(self, *args, **kwargs):
+        self.set.update(kwargs)
+        self.rotate_direction = str2int(self.set['rotate_direction'])
+
+        # print info
+        if str2bool(self.set['b_print']):
+            self.print_info()
+
+    def print_info(self):
+        print("<rotate>",
+            'rotate_direction', self.rotate_direction,)
+
+
+    def run(self, image_data):
+        image_data = self._rotate(image_data)
+        return image_data
+
+    def _rotate(self,img):
+        if self.rotate_direction == 1 or self.rotate_direction == 2:
+            col, row, unit = img.shape
+            pInBuf = img.reshape((-1,1))
+            pOutBufTemp = np.zeros((col* row* unit))
+            for r in range(row):
+                for c in range(col):
+                    for u in range(unit):
+                        if self.rotate_direction == 1:
+                            pOutBufTemp[unit * (c * row + (row - r - 1))+u] = pInBuf[unit * (r * col + c)+u]
+                        elif self.rotate_direction == 2:
+                            pOutBufTemp[unit * (row * (col - c - 1) + r)+u] = pInBuf[unit * (r * col + c)+u]
+
+            img = pOutBufTemp.reshape((col,row,unit))
+
+        return img
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/Runner_base.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/Runner_base.py
@ -0,0 +1,59 @@
+from abc import ABCMeta, abstractmethod
+
+class Param_base(object):
+    @abstractmethod
+    def update(self,**dic):
+        raise NotImplementedError("Must override")
+
+    def load_dic(self, key, **dic):
+        if key in dic:
+            param = eval('self.'+key)
+            param = dic[key]
+
+    def __str__(self):
+        str_out = []
+        return(' '.join(str_out))
+  
+
+class Common(Param_base):
+    print_info = False
+    model_size = [0,0]
+    numerical_type = 'floating'
+
+    def update(self, **dic):
+        self.print_info = dic['print_info']
+        self.model_size = dic['model_size']
+        self.numerical_type = dic['numerical_type']
+    
+    def __str__(self):
+        str_out = ['numerical_type:',str(self.numerical_type)]
+        return(' '.join(str_out))
+    
+class Runner_base(metaclass=ABCMeta):
+    common = Common()
+    general = Param_base()
+    floating = Param_base()
+    hw = Param_base()
+
+    def update(self, **kwargs):
+        ## update param
+        self.common.update(**kwargs['common'])
+        self.general.update(**kwargs['general'])
+        assert(self.common.numerical_type.lower() in ['floating', '520', '720'])
+        if (self.common.numerical_type == 'floating'):
+            if (self.floating.__class__.__name__ != 'Param_base'):
+                self.floating.update(**kwargs['floating'])
+        else:
+            if (self.hw.__class__.__name__ != 'Param_base'):
+                self.hw.update(**kwargs['hw'])
+
+    def print_info(self):
+        if (self.common.numerical_type == 'floating'):
+            print(self, self.common, self.general, self.floating)
+        else:
+            print(self, self.common, self.general, self.hw)
+        
+
+
+        
+
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/init.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/init.py
@ -0,0 +1,2 @@
+from . import ColorConversion, Padding, Resize, Crop, Normalize, Rotate
+
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/utils.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/utils.py
@ -0,0 +1,372 @@
+import numpy as np
+from PIL import Image
+import struct
+
+def pad_square_to_4(x_start, x_end, y_start, y_end):
+    w_int = x_end - x_start 
+    h_int = y_end - y_start
+    pad = w_int - h_int
+    if pad > 0:
+        pad_s = (pad >> 1) &(~3)
+        pad_e = pad - pad_s
+        y_start -= pad_s
+        y_end += pad_e
+    else:#//pad <=0
+        pad_s = -(((pad) >> 1) &(~3))
+        pad_e = (-pad) - pad_s
+        x_start -= pad_s
+        x_end += pad_e
+    return x_start, x_end, y_start, y_end
+
+def str_fill(value):
+    if len(value) == 1:
+        value = "0" + value
+    elif len(value) == 0:
+        value = "00"
+
+    return value
+
+def clip_ary(value):
+    list_v = []
+    for i in range(len(value)):
+        v = value[i] % 256
+        list_v.append(v)
+
+    return list_v
+    
+def str2bool(v):
+    if isinstance(v,bool):
+        return v
+    return v.lower() in ('TRUE', 'True', 'true', '1', 'T', 't', 'Y', 'YES', 'y', 'yes')
+
+
+def str2int(s):
+    if s == "":
+        s = 0
+    s = int(s)
+    return s
+
+def str2float(s):
+    if s == "":
+        s = 0
+    s = float(s)
+    return s
+
+def clip(value, mini, maxi):
+    if value < mini:
+        result = mini
+    elif value > maxi:
+        result = maxi
+    else:
+        result = value
+
+    return result
+
+
+def clip_ary(value):
+    list_v = []
+    for i in range(len(value)):
+        v = value[i] % 256
+        list_v.append(v)
+
+    return list_v
+
+
+def signed_rounding(value, bit):
+    if value < 0:
+        value = value - (1 << (bit - 1))
+    else:
+        value = value + (1 << (bit - 1))
+
+    return value
+
+def hex_loader(data_folder,**kwargs):
+    format_mode = kwargs['raw_img_fmt']
+    src_h = kwargs['img_in_height']
+    src_w = kwargs['img_in_width']
+
+    if format_mode in ['YUV444', 'yuv444', 'YCBCR444', 'YCbCr444', 'ycbcr444']:
+        output = hex_yuv444(data_folder,src_h,src_w)
+    elif format_mode in ['RGB565', 'rgb565']:
+        output = hex_rgb565(data_folder,src_h,src_w)
+    elif format_mode in ['YUV422', 'yuv422', 'YCBCR422', 'YCbCr422', 'ycbcr422']:
+        output = hex_yuv422(data_folder,src_h,src_w)
+
+    return output
+
+def hex_rgb565(hex_folder,src_h,src_w):
+    pix_per_line = 8
+    byte_per_line = 16
+
+    f = open(hex_folder)
+    pixel_r = []
+    pixel_g = []
+    pixel_b = []
+
+    # Ignore the first line
+    f.readline()
+    input_line = int((src_h * src_w)/pix_per_line)
+    for i in range(input_line):
+        readline = f.readline()
+        for j in range(int(byte_per_line/2)-1, -1, -1):
+            data1 = int(readline[(j * 4 + 0):(j * 4 + 2)], 16)
+            data0 = int(readline[(j * 4 + 2):(j * 4 + 4)], 16)
+            r = ((data1 & 0xf8) >> 3)
+            g = (((data0 & 0xe0) >> 5) + ((data1 & 0x7) << 3))
+            b = (data0 & 0x1f)
+            pixel_r.append(r)
+            pixel_g.append(g)
+            pixel_b.append(b)
+
+    ary_r = np.array(pixel_r, dtype=np.uint8)
+    ary_g = np.array(pixel_g, dtype=np.uint8)
+    ary_b = np.array(pixel_b, dtype=np.uint8)
+    output = np.concatenate((ary_r[:, None], ary_g[:, None], ary_b[:, None]), axis=1)
+    output = output.reshape((src_h, src_w, 3))
+
+    return output
+
+def hex_yuv444(hex_folder,src_h,src_w):
+    pix_per_line = 4
+    byte_per_line = 16
+
+    f = open(hex_folder)
+    byte0 = []
+    byte1 = []
+    byte2 = []
+    byte3 = []
+
+    # Ignore the first line
+    f.readline()
+    input_line = int((src_h * src_w)/pix_per_line)
+    for i in range(input_line):
+        readline = f.readline()
+        for j in range(byte_per_line-1, -1, -1):
+            data = int(readline[(j*2):(j*2+2)], 16)
+            if (j+1) % 4 == 0:
+                byte0.append(data)
+            elif (j+2) % 4 == 0:
+                byte1.append(data)
+            elif (j+3) % 4 == 0:
+                byte2.append(data)
+            elif (j+4) % 4 == 0:
+                byte3.append(data)
+    # ary_a = np.array(byte0, dtype=np.uint8)
+    ary_v = np.array(byte1, dtype=np.uint8)
+    ary_u = np.array(byte2, dtype=np.uint8)
+    ary_y = np.array(byte3, dtype=np.uint8)
+    output = np.concatenate((ary_y[:, None], ary_u[:, None], ary_v[:, None]), axis=1)
+    output = output.reshape((src_h, src_w, 3))
+
+    return output
+
+def hex_yuv422(hex_folder,src_h,src_w):
+    pix_per_line = 8
+    byte_per_line = 16
+    f = open(hex_folder)
+    pixel_y = []
+    pixel_u = []
+    pixel_v = []
+
+    # Ignore the first line
+    f.readline()
+    input_line = int((src_h * src_w)/pix_per_line)
+    for i in range(input_line):
+        readline = f.readline()
+        for j in range(int(byte_per_line/4)-1, -1, -1):
+            data3 = int(readline[(j * 8 + 0):(j * 8 + 2)], 16)
+            data2 = int(readline[(j * 8 + 2):(j * 8 + 4)], 16)
+            data1 = int(readline[(j * 8 + 4):(j * 8 + 6)], 16)
+            data0 = int(readline[(j * 8 + 6):(j * 8 + 8)], 16)
+            pixel_y.append(data3)
+            pixel_y.append(data1)
+            pixel_u.append(data2)
+            pixel_u.append(data2)
+            pixel_v.append(data0)
+            pixel_v.append(data0)
+
+    ary_y = np.array(pixel_y, dtype=np.uint8)
+    ary_u = np.array(pixel_u, dtype=np.uint8)
+    ary_v = np.array(pixel_v, dtype=np.uint8)
+    output = np.concatenate((ary_y[:, None], ary_u[:, None], ary_v[:, None]), axis=1)
+    output = output.reshape((src_h, src_w, 3))
+
+    return output
+
+def bin_loader(data_folder,**kwargs):
+    format_mode = kwargs['raw_img_fmt']
+    src_h = kwargs['img_in_height']
+    src_w = kwargs['img_in_width']
+    if format_mode in ['YUV','yuv','YUV444', 'yuv444', 'YCBCR','YCbCr','ycbcr','YCBCR444', 'YCbCr444', 'ycbcr444']:
+        output = bin_yuv444(data_folder,src_h,src_w)
+    elif format_mode in ['RGB565', 'rgb565']:
+        output = bin_rgb565(data_folder,src_h,src_w)
+    elif format_mode in ['NIR', 'nir','NIR888', 'nir888']:
+        output = bin_nir(data_folder,src_h,src_w)
+    elif format_mode in ['YUV422', 'yuv422', 'YCBCR422', 'YCbCr422', 'ycbcr422']:
+        output = bin_yuv422(data_folder,src_h,src_w)
+    elif format_mode in ['RGB888','rgb888']:
+        output = np.fromfile(data_folder, dtype='uint8')
+        output = output.reshape(src_h,src_w,3)
+    elif format_mode in ['RGBA8888','rgba8888', 'RGBA' , 'rgba']:
+        output_temp = np.fromfile(data_folder, dtype='uint8')
+        output_temp = output_temp.reshape(src_h,src_w,4)
+        output = output_temp[:,:,0:3]
+
+    return output
+
+def bin_yuv444(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+    
+    row = src_h
+    col = src_w
+    pixels = row*col
+
+    raw = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            raw.append(s[0])
+    
+
+    raw = raw[:pixels*4]
+
+    #
+    output = np.zeros((pixels * 3), dtype=np.uint8)
+    cnt = 0
+    for i in range(0, pixels*4, 4):
+        #Y
+        output[cnt] = raw[i+3]
+        #U
+        cnt += 1
+        output[cnt] = raw[i+2]
+        #V
+        cnt += 1
+        output[cnt] = raw[i+1]
+
+        cnt += 1          
+
+    output = output.reshape((src_h,src_w,3))
+    return output
+    
+def bin_yuv422(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+    
+    row = src_h
+    col = src_w
+    pixels = row*col
+
+    raw = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            raw.append(s[0])
+    
+
+    raw = raw[:pixels*2]
+
+    #
+    output = np.zeros((pixels * 3), dtype=np.uint8)
+    cnt = 0
+    for i in range(0, pixels*2, 4):
+        #Y0
+        output[cnt] = raw[i+3]
+        #U0
+        cnt += 1
+        output[cnt] = raw[i+2]
+        #V0
+        cnt += 1
+        output[cnt] = raw[i]
+        #Y1
+        cnt += 1
+        output[cnt] = raw[i+1]
+        #U1
+        cnt += 1
+        output[cnt] = raw[i+2]
+        #V1
+        cnt += 1
+        output[cnt] = raw[i]
+
+        cnt += 1          
+
+    output = output.reshape((src_h,src_w,3))
+    return output
+
+def bin_rgb565(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+    
+    row = src_h
+    col = src_w
+    pixels = row*col
+
+    rgba565 = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            rgba565.append(s[0])
+    
+
+    rgba565 = rgba565[:pixels*2]
+
+    # rgb565_bin to numpy_array
+    output = np.zeros((pixels * 3), dtype=np.uint8)
+    cnt = 0
+    for i in range(0, pixels*2, 2):
+        temp = rgba565[i]
+        temp2 = rgba565[i+1]
+        #R-5
+        output[cnt] = (temp2 >>3)
+        
+        #G-6
+        cnt += 1
+        output[cnt] = ((temp & 0xe0) >> 5) + ((temp2 & 0x07) << 3)
+        
+        #B-5
+        cnt += 1
+        output[cnt] = (temp & 0x1f)
+
+        cnt += 1          
+
+    output = output.reshape((src_h,src_w,3))
+    return output
+
+def bin_nir(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+
+    nir = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            nir.append(s[0])
+            
+    nir = nir[:src_h*src_w]
+    pixels = len(nir)
+    # nir_bin to numpy_array
+    output = np.zeros((len(nir) * 3), dtype=np.uint8)
+    for i in range(0, pixels):
+        output[i*3]=nir[i]
+        output[i*3+1]=nir[i]
+        output[i*3+2]=nir[i]
+
+    output = output.reshape((src_h,src_w,3))
+    return output
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/utils_520.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/utils_520.py
@ -0,0 +1,50 @@
+import math
+
+def round_up_16(num):
+    return ((num + (16 - 1)) & ~(16 - 1))
+
+def round_up_n(num, n):
+    if (num > 0):
+        temp = float(num) / n
+        return math.ceil(temp) * n
+    else:
+        return -math.ceil(float(-num) / n) * n
+
+def cal_img_row_offset(crop_num, pad_num, start_row, out_row, orig_row):
+
+    scaled_img_row = int(out_row - (pad_num[1] + pad_num[3]))
+    if ((start_row - pad_num[1]) > 0):
+        img_str_row = int((start_row - pad_num[1]))
+    else:
+        img_str_row = 0
+    valid_row = int(orig_row - (crop_num[1] + crop_num[3]))
+    img_str_row = int(valid_row * img_str_row / scaled_img_row)
+    return int(img_str_row + crop_num[1])
+
+def get_pad_num(pad_num_orig, left, up, right, bottom):
+    pad_num = [0]*4
+    for i in range(0,4):
+        pad_num[i] = pad_num_orig[i]
+
+    if not (left):
+        pad_num[0] = 0
+    if not (up):
+        pad_num[1] = 0
+    if not (right):
+        pad_num[2] = 0
+    if not (bottom):
+        pad_num[3] = 0
+
+    return pad_num
+
+def get_byte_per_pixel(raw_fmt):
+    if raw_fmt.lower() in ['RGB888', 'rgb888', 'RGB', 'rgb888']:
+        return 4
+    elif raw_fmt.lower() in ['YUV', 'yuv', 'YUV422', 'yuv422']:
+        return 2
+    elif raw_fmt.lower() in ['RGB565', 'rgb565']:
+        return 2
+    elif raw_fmt.lower() in ['NIR888', 'nir888', 'NIR', 'nir']:
+        return 1
+    else:
+        return -1
--- a/kneron/exporting/yolov5/kneron_preprocessing/funcs/utils_720.py
+++ b/kneron/exporting/yolov5/kneron_preprocessing/funcs/utils_720.py
@ -0,0 +1,42 @@
+import numpy as np
+from PIL import Image
+
+def twos_complement(value):
+    value = int(value)
+    # msb = (value & 0x8000) * (1/np.power(2, 15))
+    msb = (value & 0x8000) >> 15
+    if msb == 1:
+        if (((~value) & 0xFFFF) + 1) >= 0xFFFF:
+            result = ((~value) & 0xFFFF)
+        else:
+            result = (((~value) & 0xFFFF) + 1)
+        result = result * (-1)
+    else:
+        result = value
+
+    return result
+
+
+def twos_complement_pix(value):
+    h, _ = value.shape
+    for i in range(h):
+        value[i, 0] = twos_complement(value[i, 0])
+
+    return value
+
+def clip(value, mini, maxi):
+    if value < mini:
+        result = mini
+    elif value > maxi:
+        result = maxi
+    else:
+        result = value
+
+    return result
+
+def clip_pix(value, mini, maxi):
+    h, _ = value.shape
+    for i in range(h):
+        value[i, 0] = clip(value[i, 0], mini, maxi)
+
+    return value
--- a/kneron/exporting/yolov5/quantize_yolov5.py
+++ b/kneron/exporting/yolov5/quantize_yolov5.py
@ -0,0 +1,45 @@
+import os
+import numpy as np
+import torch
+import ktc  # Kneron Toolchain
+from yolov5_preprocess import Yolov5_preprocess  # 使用你的預處理
+import kneron_preprocessing
+
+# 設定裝置
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# 設定圖片大小（與訓練時一致）
+imgsz_h, imgsz_w = 640, 640
+
+# 量化數據集目錄（請確保這個資料夾存在）
+data_path = "/data50"
+img_list = []
+
+# 設定 ONNX 模型路徑（確保這個路徑在 Docker 內部是否正確）
+onnx_model_path = "/workspace/yolov5/latest.opt.onnx"
+
+# **初始化 Kneron ModelConfig 物件**
+km = ktc.ModelConfig(20008, "0001", "720", onnx_model=onnx_model_path)
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+        
+        # 執行與訓練相同的預處理
+        img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+
+        # 確保格式為 NumPy 陣列
+        img_data = img_data.cpu().numpy()
+
+        print(f"Processed: {fullpath}")
+        img_list.append(img_data)
+
+# 轉為 NumPy 格式
+img_list = np.array(img_list)
+
+# **執行 BIE 量化分析**
+bie_model_path = km.analysis({"input": img_list})
+
+# 輸出成功訊息
+print("\n✅ Fixed-point analysis done! BIE model saved to:", bie_model_path)
--- a/kneron/exporting/yolov5/readme.txt
+++ b/kneron/exporting/yolov5/readme.txt
@ -0,0 +1,68 @@
+(1)yolov5_app.py for ploting moldel inference results
+cd applications
+python yolov5_app.py
+
+(2)yolov5_evaluation.py for evaluating moldel mAP at hw_repo
+cd applications
+python yolov5_evaluation.py
+
+#mAP @ yolov5s_v2_op9_sig_batch1_input05_640x640_nearest_convert.onnx with(upsampling rearest)
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.346
+ Average Precision  (AP) @[ IoU=0.50      | area=   all | maxDets=100 ] = 0.533
+ Average Precision  (AP) @[ IoU=0.75      | area=   all | maxDets=100 ] = 0.372
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.196
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.391
+ Average Precision  (AP) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.442
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=  1 ] = 0.279
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets= 10 ] = 0.456
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=   all | maxDets=100 ] = 0.503
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= small | maxDets=100 ] = 0.320
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area=medium | maxDets=100 ] = 0.557
+ Average Recall     (AR) @[ IoU=0.50:0.95 | area= large | maxDets=100 ] = 0.618
+
+(3)yolov5_compare_pth_onnx.py for comparing the results of pytorch moldel and onnx model
+cd applications
+python yolov5_compare_pth_onnx.py
+
+(4)v2 model is in the below link.
+10.200.210.221:/mnt/models/Object_models/YOLOv5/yolov5s_v2_state_dict_input05.pt
+10.200.210.221:/mnt/models/Object_models/YOLOv5/yolov5s_v2_op9_sig_batch1_input05_640x640_nearest_convert.onnx
+
+(5)the parameters setting
+(5.1)In order to get high mAP in coco val2017, please use 
+101620_yolov5_init_params.json
+{
+    "model_path": "/mnt/models/Object_models/YOLOv5/yolov5s_v2_state_dict_input05.pt",
+    "grid20_path": "/mnt/models/Object_models/YOLOv5/20_640x640.npy",
+    "grid40_path": "/mnt/models/Object_models/YOLOv5/40_640x640.npy",
+    "grid80_path": "/mnt/models/Object_models/YOLOv5/80_640x640.npy",
+    "num_classes": 80,
+    "imgsz_h": 640,
+	"imgsz_w": 640,
+	"conf_thres": 0.001,
+	"iou_thres": 0.65,
+    "top_k_num": 3000
+}
+
+
+(5.2)For video usage scenarios, please use
+102320_yolov5_init_params.json
+{
+    "model_path": "/mnt/models/Object_models/YOLOv5/yolov5s_v2_state_dict_input05.pt",
+    "grid20_path": "/mnt/models/Object_models/YOLOv5/20_640x352.npy",
+    "grid40_path": "/mnt/models/Object_models/YOLOv5/40_640x352.npy",
+    "grid80_path": "/mnt/models/Object_models/YOLOv5/80_640x352.npy",
+    "num_classes": 80,
+    "imgsz_h": 352,
+	"imgsz_w": 640,
+	"conf_thres": 0.3,
+	"iou_thres": 0.5,
+    "top_k_num": 3000
+}
+
+(5.3)The differences of above setting are
+(5.3.1) Video uses input (640w*352h) to run faster.
+Coco has high or flat wide images, so it is better to use input (640w*640h) 
+ 
+(5.3.2) Using the yolov5 official website setting test coco val2017, the confidence setting is low "conf_thres": 0.001, and the iou setting of NMS is high "iou_thres": 0.65, which gets a better mAP.
+But running video needs to be set to "conf_thres": 0.3, so that there are not too many false positives, and the iou setting of NMS "iou_thres": 0.5 is more friendly to close objects
--- a/kneron/exporting/yolov5/yolo_v2.py
+++ b/kneron/exporting/yolov5/yolo_v2.py
@ -0,0 +1,164 @@
+import argparse
+from copy import deepcopy
+import torch
+#from experimental import *
+from .common import *
+#from .common_v3 import *
+from pathlib import Path
+import math
+import yaml
+
+class Detect(nn.Module):
+    def __init__(self, nc=80, anchors=(), ch=()):  # detection layer
+        super(Detect, self).__init__()
+        self.stride = None  # strides computed during build
+        self.nc = nc  # number of classes
+        self.no = nc + 5  # number of outputs per anchor
+        self.nl = len(anchors)  # number of detection layers
+        self.na = len(anchors[0]) // 2  # number of anchors
+        self.grid = [torch.zeros(1)] * self.nl  # init grid
+        a = torch.tensor(anchors).float().view(self.nl, -1, 2)
+        self.register_buffer('anchors', a)  # shape(nl,na,2)
+        self.register_buffer('anchor_grid', a.clone().view(self.nl, 1, -1, 1, 1, 2))  # shape(nl,1,na,1,1,2)
+        self.m = nn.ModuleList(nn.Conv2d(x, self.no * self.na, 1) for x in ch)  # output conv
+        self.export = False  # onnx export
+
+    def forward(self, x):
+        # x = x.copy()  # for profiling
+        z = []  # inference output
+        self.training |= self.export
+        for i in range(self.nl):
+            x[i] = self.m[i](x[i])  # conv
+            
+            # bs, _, ny, nx = x[i].shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+            # x[i] = x[i].view(bs, self.na, self.no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+            x[i] = x[i].sigmoid()
+
+
+
+        # return x if self.training else (torch.cat(z, 1), x)
+        return x
+
+    @staticmethod
+    def _make_grid(nx=20, ny=20):
+        yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+        return torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+
+class Model(nn.Module):
+    def __init__(self, cfg='yolov5s.yaml', ch=3, nc=None):  # model, input channels, number of classes
+        super(Model, self).__init__()   
+        with open(cfg) as f:
+            self.yaml = yaml.load(f, Loader=yaml.FullLoader)  # model dict
+
+        # Define model
+        if nc and nc != self.yaml['nc']:
+            print('Overriding %s nc=%g with nc=%g' % (cfg, self.yaml['nc'], nc))
+            self.yaml['nc'] = nc  # override yaml value
+        self.model, self.save = parse_model(deepcopy(self.yaml), ch=[ch])  # model, savelist, ch_out
+        # print([x.shape for x in self.forward(torch.zeros(1, ch, 64, 64))])
+
+        # Build strides, anchors
+        m = self.model[-1]  # Detect()
+        if isinstance(m, Detect):
+            s = 128  # 2x min stride
+            #m.stride = torch.tensor([s / x.shape[-2] for x in self.forward(torch.zeros(1, ch, s, s))])  # forward
+            # FocusNoSliceCat
+            m.stride = torch.tensor([8.0,16.0,32.0])  # forward
+            m.anchors /= m.stride.view(-1, 1, 1)
+            check_anchor_order(m)
+            self.stride = m.stride
+            self._initialize_biases()  # only run once
+            # print('Strides: %s' % m.stride.tolist())
+
+        # Init weights, biases
+        initialize_weights(self)
+
+
+    def forward(self, x, augment=False, profile=False):
+        y, dt = [], []  # outputs
+        for m in self.model:
+            if m.f != -1:  # if not from previous layer
+                x = y[m.f] if isinstance(m.f, int) else [x if j == -1 else y[j] for j in m.f]  # from earlier layers
+            x = m(x)  # run
+            y.append(x if m.i in self.save else None)  # save output
+        return x
+
+    def _initialize_biases(self, cf=None):  # initialize biases into Detect(), cf is class frequency
+        # cf = torch.bincount(torch.tensor(np.concatenate(dataset.labels, 0)[:, 0]).long(), minlength=nc) + 1.
+        m = self.model[-1]  # Detect() module
+        for mi, s in zip(m.m, m.stride):  #  from
+            b = mi.bias.view(m.na, -1)  # conv.bias(255) to (3,85)
+            b[:, 4] += math.log(8 / (640 / s) ** 2)  # obj (8 objects per 640 image)
+            b[:, 5:] += math.log(0.6 / (m.nc - 0.99)) if cf is None else torch.log(cf / cf.sum())  # cls
+            mi.bias = torch.nn.Parameter(b.view(-1), requires_grad=True)
+
+def make_divisible(x, divisor):
+    # Returns x evenly divisble by divisor
+    return math.ceil(x / divisor) * divisor
+
+def check_anchor_order(m):
+    # Check anchor order against stride order for YOLOv5 Detect() module m, and correct if necessary
+    a = m.anchor_grid.prod(-1).view(-1)  # anchor area
+    da = a[-1] - a[0]  # delta a
+    ds = m.stride[-1] - m.stride[0]  # delta s
+    if da.sign() != ds.sign():  # same order
+        print('Reversing anchor order')
+        m.anchors[:] = m.anchors.flip(0)
+        m.anchor_grid[:] = m.anchor_grid.flip(0)
+
+def initialize_weights(model):
+    for m in model.modules():
+        t = type(m)
+        if t is nn.Conv2d:
+            pass  # nn.init.kaiming_normal_(m.weight, mode='fan_out', nonlinearity='relu')
+        elif t is nn.BatchNorm2d:
+            m.eps = 1e-3
+            m.momentum = 0.03
+        elif t in [nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+            m.inplace = True
+
+def parse_model(d, ch):  # model_dict, input_channels(3) #original
+    #print('\n%3s%18s%3s%10s  %-40s%-30s' % ('', 'from', 'n', 'params', 'module', 'arguments'))
+    anchors, nc, gd, gw = d['anchors'], d['nc'], d['depth_multiple'], d['width_multiple']
+    na = (len(anchors[0]) // 2) if isinstance(anchors, list) else anchors  # number of anchors
+    no = na * (nc + 5)  # number of outputs = anchors * (classes + 5)
+
+    layers, save, c2 = [], [], ch[-1]  # layers, savelist, ch out
+    for i, (f, n, m, args) in enumerate(d['backbone'] + d['head']):  # from, number, module, args
+        m = eval(m) if isinstance(m, str) else m  # eval strings
+        for j, a in enumerate(args):
+            try:
+                args[j] = eval(a) if isinstance(a, str) else a  # eval strings
+            except:
+                pass
+
+        n = max(round(n * gd), 1) if n > 1 else n  # depth gain
+        if m in [nn.Conv2d, Conv, Bottleneck, SPP, DWConv,  Focus,  BottleneckCSP]:
+            #print('*m',m)
+            c1, c2 = ch[f], args[0]
+            c2 = make_divisible(c2 * gw, 8) if c2 != no else c2
+            args = [c1, c2, *args[1:]]
+            if m in [BottleneckCSP]:
+                args.insert(2, n)
+                n = 1
+        elif m is nn.BatchNorm2d:
+            args = [ch[f]]
+        elif m is Concat:
+            c2 = sum([ch[-1 if x == -1 else x + 1] for x in f])
+        elif m is Detect:
+            args.append([ch[x + 1] for x in f])
+            if isinstance(args[1], int):  # number of anchors
+                args[1] = [list(range(args[1] * 2))] * len(f)
+        else:
+            c2 = ch[f]
+
+        m_ = nn.Sequential(*[m(*args) for _ in range(n)]) if n > 1 else m(*args)  # module
+        t = str(m)[8:-2].replace('__main__.', '')  # module type
+        np = sum([x.numel() for x in m_.parameters()])  # number params
+        m_.i, m_.f, m_.type, m_.np = i, f, t, np  # attach index, 'from' index, type, number params
+        #print('%3s%18s%3s%10.0f  %-40s%-30s' % (i, f, n, np, t, args))  # print
+        save.extend(x % i for x in ([f] if isinstance(f, int) else f) if x != -1)  # append to savelist
+        layers.append(m_)
+        ch.append(c2)
+    return nn.Sequential(*layers), sorted(save)
+
--- a/kneron/exporting/yolov5/yolov5_postprocess.py
+++ b/kneron/exporting/yolov5/yolov5_postprocess.py
@ -0,0 +1,334 @@
+# coding: utf-8
+import torch
+import torchvision
+import time
+import numpy as np
+import sys
+np.set_printoptions(threshold=sys.maxsize)
+def box_iou(box1, box2):
+    # https://github.com/pytorch/vision/blob/master/torchvision/ops/boxes.py
+    """
+    Return intersection-over-union (Jaccard index) of boxes.
+    Both sets of boxes are expected to be in (x1, y1, x2, y2) format.
+    Arguments:
+        box1 (Tensor[N, 4])
+        box2 (Tensor[M, 4])
+    Returns:
+        iou (Tensor[N, M]): the NxM matrix containing the pairwise
+            IoU values for every element in boxes1 and boxes2
+    """
+
+    def box_area(box):
+        # box = 4xn
+        return (box[2] - box[0]) * (box[3] - box[1])
+
+    area1 = box_area(box1.t())
+    area2 = box_area(box2.t())
+
+    # inter(N,M) = (rb(N,M,2) - lt(N,M,2)).clamp(0).prod(2)
+    inter = (torch.min(box1[:, None, 2:], box2[:, 2:]) - torch.max(box1[:, None, :2], box2[:, :2])).clamp(0).prod(2)
+    return inter / (area1[:, None] + area2 - inter)  # iou = inter / (area1 + area2 - inter)
+
+def xywh2xyxy(x):
+    # Convert nx4 boxes from [x, y, w, h] to [x1, y1, x2, y2] where xy1=top-left, xy2=bottom-right
+    y = torch.zeros_like(x) if isinstance(x, torch.Tensor) else np.zeros_like(x)
+    y[:, 0] = x[:, 0] - x[:, 2] / 2  # top left x
+    y[:, 1] = x[:, 1] - x[:, 3] / 2  # top left y
+    y[:, 2] = x[:, 0] + x[:, 2] / 2  # bottom right x
+    y[:, 3] = x[:, 1] + x[:, 3] / 2  # bottom right y
+    return y
+
+def non_max_suppression(prediction, conf_thres=0.1, iou_thres=0.6, top_k_num=3000, merge=False, classes=None, agnostic=False):
+    """Performs Non-Maximum Suppression (NMS) on inference results
+
+    Returns:
+         detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+    """
+    # print('conf_thres',conf_thres)
+    if prediction.dtype is torch.float16:
+        prediction = prediction.float()  # to FP32
+
+    nc = prediction[0].shape[1] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_det = 300  # maximum number of detections per image
+    time_limit = 10.0  # seconds to quit after
+    multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)
+
+    t = time.time()
+    output = [None] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero().t()
+            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+        else:  # best class only
+            conf, j = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+
+
+
+        # If none remain process next image
+        n = x.shape[0]  # number of boxes
+        if not n:
+            continue
+
+        # Sort by confidence
+        # x = x[x[:, 4].argsort(descending=True)]
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        # # Sort by confidence
+        ind_Sort_by_confidence = x[:, 4].argsort(descending=True)
+        boxes = boxes[ind_Sort_by_confidence][:top_k_num] #
+        scores = scores[ind_Sort_by_confidence][:top_k_num] #
+        x = x[ind_Sort_by_confidence][:top_k_num] #
+        # cross classes nms
+        i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+        # if merge and (1 < n < 3E3):  # Merge NMS (boxes merged using weighted mean)
+        #     update boxes as boxes(i,4) = weights(i,n) * boxes(n,4)
+        #     iou = box_iou(boxes[i], boxes) > iou_thres  # iou matrix
+        #     weights = iou * scores[None]  # box weights
+        #     x[i, :4] = torch.mm(weights, x[:, :4]).float() / weights.sum(1, keepdim=True)  # merged boxes
+        #     if redundant:
+        #         i = i[iou.sum(1) > 1]  # require redundancy
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            break  # time limit exceeded
+
+    return output
+
+def non_max_suppression_kneron(prediction, conf_thres=0.1, iou_thres=0.6, top_k_num=3000, merge=False, classes=None, agnostic=False):
+    """Performs Non-Maximum Suppression (NMS) on inference results
+
+    Returns:
+         detections with shape: nx6 (x1, y1, x2, y2, conf, cls)
+    """
+    if prediction.dtype is torch.float16:
+        prediction = prediction.float()  # to FP32
+
+    nc = prediction[0].shape[1] - 5  # number of classes
+    xc = prediction[..., 4] > conf_thres  # candidates
+
+    # Settings
+    min_wh, max_wh = 2, 4096  # (pixels) minimum and maximum box width and height
+    max_det = 300  # maximum number of detections per image
+    time_limit = 10.0  # seconds to quit after
+    multi_label = nc > 1  # multiple labels per box (adds 0.5ms/img)
+
+    t = time.time()
+    output = [None] * prediction.shape[0]
+    for xi, x in enumerate(prediction):  # image index, image inference
+        # Apply constraints
+        # x[((x[..., 2:4] < min_wh) | (x[..., 2:4] > max_wh)).any(1), 4] = 0  # width-height
+        x = x[xc[xi]]  # confidence
+
+        # If none remain process next image
+        if not x.shape[0]:
+            continue
+
+        # Compute conf
+        x[:, 5:] *= x[:, 4:5]  # conf = obj_conf * cls_conf
+
+        # Box (center x, center y, width, height) to (x1, y1, x2, y2)
+        box = xywh2xyxy(x[:, :4])
+
+        # Detections matrix nx6 (xyxy, conf, cls)
+        if multi_label:
+            i, j = (x[:, 5:] > conf_thres).nonzero().t()
+            x = torch.cat((box[i], x[i, j + 5, None], j[:, None].float()), 1)
+        else:  # best class only
+            conf, j = x[:, 5:].max(1, keepdim=True)
+            x = torch.cat((box, conf, j.float()), 1)[conf.view(-1) > conf_thres]
+
+
+
+        # If none remain process next image
+        n = x.shape[0]  # number of boxes
+        if not n:
+            continue
+
+        # Sort by confidence
+        # x = x[x[:, 4].argsort(descending=True)]
+
+        # Batched NMS
+        c = x[:, 5:6] * (0 if agnostic else max_wh)  # classes
+        boxes, scores = x[:, :4] + c, x[:, 4]  # boxes (offset by class), scores
+        # Sort by confidence
+        ind_Sort_by_confidence = x[:, 4].argsort(descending=True)
+        boxes = boxes[ind_Sort_by_confidence][:top_k_num] #
+        scores = scores[ind_Sort_by_confidence][:top_k_num] #
+        x = x[ind_Sort_by_confidence][:top_k_num] #
+        # cross classes nms
+        i = torchvision.ops.boxes.nms(boxes, scores, iou_thres)
+        if i.shape[0] > max_det:  # limit detections
+            i = i[:max_det]
+
+
+        output[xi] = x[i]
+        if (time.time() - t) > time_limit:
+            break  # time limit exceeded
+
+    return output
+
+def clip_coords(boxes, img_shape):
+    # Clip bounding xyxy bounding boxes to image shape (height, width)
+    boxes[:, 0].clamp_(0, img_shape[1])  # x1
+    boxes[:, 1].clamp_(0, img_shape[0])  # y1
+    boxes[:, 2].clamp_(0, img_shape[1])  # x2
+    boxes[:, 3].clamp_(0, img_shape[0])  # y2
+
+def scale_coords_ori(img1_shape, coords, img0_shape, ratio_pad=None):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        pad = ratio_pad[1]
+
+    coords[:, [0, 2]] -= pad[0]  # x padding
+    coords[:, [1, 3]] -= pad[1]  # y padding
+    coords[:, :4] /= gain
+    clip_coords(coords, img0_shape)
+    return coords
+
+def scale_coords(img1_shape, coords, img0_shape, ratio_pad=None):
+    # Rescale coords (xyxy) from img1_shape to img0_shape
+    if ratio_pad is None:  # calculate from img0_shape
+        gain = min(img1_shape[0] / img0_shape[0], img1_shape[1] / img0_shape[1])  # gain  = old / new
+        #pad = (img1_shape[1] - img0_shape[1] * gain) / 2, (img1_shape[0] - img0_shape[0] * gain) / 2  # wh padding
+    else:
+        gain = ratio_pad[0][0]
+        #pad = ratio_pad[1]
+
+    # coords[:, [0, 2]] -= pad[0]  # x padding
+    # coords[:, [1, 3]] -= pad[1]  # y padding
+    coords[:, :4] /= gain
+    clip_coords(coords, img0_shape)
+    return coords
+
+def scale_coords_test(img1_shape, coords, img0_shape, ratio_pad=None):
+
+    coords[:, 0] /= (img1_shape[1] / img0_shape[1])
+    coords[:, 2] /= (img1_shape[1] / img0_shape[1])
+    coords[:, 1] /= (img1_shape[0] / img0_shape[0])
+    coords[:, 3] /= (img1_shape[0] / img0_shape[0])
+    clip_coords(coords, img0_shape)
+    return coords
+
+def classes_mapping(det, num_classes):
+    det[:, 5] = det[:, 5] + 1.0
+
+def Yolov5_postprocess(pred, img_shape, im0_shape, conf_thres, iou_thres, top_k_num, num_classes, vanish_point, e2e_coco) :
+    classes, agnostic_nms = None, False#
+    img_h = im0_shape[0]
+    vanish_y2 = vanish_point * float(img_h)
+    # Apply NMS
+    pred = non_max_suppression(pred, conf_thres, iou_thres, top_k_num, classes=classes, agnostic=agnostic_nms)
+    #return pred
+    dets = []
+    for i, det in enumerate(pred):  # detections per image
+        gn = torch.tensor(im0_shape)[[1, 0, 1, 0]]  # normalization gain whwh
+        if det is not None and len(det):
+            # Rescale boxes from img_size to im0 size
+            det[:, :4] = scale_coords(img_shape[2:], det[:, :4], im0_shape).round()
+            det = det[det[:,3]>=vanish_y2]
+            # (x1,y1,x2,y2) -> (x1,y1,w,h) for public_field.py
+            det[:, 2] = det[:, 2] - det[:, 0] 
+            det[:, 3] = det[:, 3] - det[:, 1]  
+            # classes(0~79) -> classes(1~80) for public_field.py
+            if e2e_coco:
+                classes_mapping(det, num_classes)
+            det = det.cpu().numpy()  
+            dets.append(det)                                
+    
+    if dets and len(dets) > 0:
+        dets = np.asarray(dets)
+        dets = np.squeeze(dets, axis=0) # remove outer []
+        dets = dets.tolist()
+
+    return dets
+
+def make_grid(nx=20, ny=20):
+    yv, xv = torch.meshgrid([torch.arange(ny), torch.arange(nx)])
+    grids = torch.stack((xv, yv), 2).view((1, 1, ny, nx, 2)).float()
+    return grids
+
+def Yolov5_postprocess_onnx_sig(out,img_shape, im0_shape, conf_thres, iou_thres, top_k_num, grids, num_classes, anchors,vanish_point, e2e_coco) :
+    nc = num_classes  # number of classes
+    no = nc + 5  # number of outputs per anchor    
+    nl = len(anchors)  # number of detection layers
+    na = len(anchors[0]) // 2  # number of anchors
+    a = torch.tensor(anchors).float().view(3, -1, 2)
+    anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2)
+    stride = torch.tensor([ 8., 16., 32.])     
+    z = []
+    for i in range(nl):
+        x = torch.from_numpy(out[i])
+        # print('x.shape',x.shape)
+        bs, _, ny, nx = x.shape  # x(bs,3,20,20,85)
+        x = x.view(bs, na, no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+        # grid_r = make_grid(nx, ny) ##grid
+        # grid_r = grid_r.numpy() ##grid
+        # file_name = str(i)+'.npy' ##grid
+        # np.save(file_name,grid_r) ##grid
+        grid = grids[i]#
+        #y = x.sigmoid()
+        y = x
+        y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid) * stride[i]  # xy
+        y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i]  # wh
+        z.append(y.view(bs, -1, no))
+
+    pred = torch.cat(z, 1)
+    return Yolov5_postprocess(pred, img_shape, im0_shape, conf_thres, iou_thres, top_k_num, num_classes,vanish_point, e2e_coco)
+
+def Yolov5_postprocess_sig(out,img_shape, im0_shape, conf_thres, iou_thres, top_k_num, grids, num_classes, anchors,vanish_point, e2e_coco) :
+    nc = num_classes  # number of classes
+    no = nc + 5  # number of outputs per anchor    
+    nl = len(anchors)  # number of detection layers
+    na = len(anchors[0]) // 2  # number of anchors
+    a = torch.tensor(anchors).float().view(3, -1, 2)
+    anchor_grid = a.clone().view(3, 1, -1, 1, 1, 2).to(out[0].device)
+    stride = torch.tensor([ 8., 16., 32.]).to(out[0].device)     
+    z = []
+    for i in range(nl):
+        x = out[i]
+        bs, _, ny, nx = x.shape  # x(bs,255,20,20) to x(bs,3,20,20,85)
+        # print('x.shape',x.shape)
+        x = x.view(bs, na, no, ny, nx).permute(0, 1, 3, 4, 2).contiguous()
+        
+        # grid_r = make_grid(nx, ny) ##grid
+        # grid_r = grid_r.numpy() ##grid
+        # file_name = str(i)+'.npy' ##grid
+        # np.save(file_name,grid_r) ##grid
+        
+        grid = grids[i].to(out[0].device) #
+        #y = x.sigmoid()
+        y = x
+        y[..., 0:2] = (y[..., 0:2] * 2. - 0.5 + grid) * stride[i]  # xy
+        y[..., 2:4] = (y[..., 2:4] * 2) ** 2 * anchor_grid[i]  # wh
+        z.append(y.view(bs, -1, no))   
+    # exit(0)
+    pred = torch.cat(z, 1)
+    return Yolov5_postprocess(pred, img_shape, im0_shape, conf_thres, iou_thres, top_k_num, num_classes,vanish_point, e2e_coco)
--- a/kneron/exporting/yolov5/yolov5_preprocess.py
+++ b/kneron/exporting/yolov5/yolov5_preprocess.py
@ -0,0 +1,160 @@
+# coding: utf-8
+import torch
+import cv2
+import numpy as np
+import math
+import time
+from . import kneron_preprocessing
+kneron_preprocessing.API.set_default_as_520()
+torch.backends.cudnn.deterministic = True
+img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
+def make_divisible(x, divisor):
+    # Returns x evenly divisble by divisor
+    return math.ceil(x / divisor) * divisor
+
+def check_img_size(img_size, s=32):
+    # Verify img_size is a multiple of stride s
+    new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
+    if new_size != img_size:
+        print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+    return new_size
+
+def letterbox_ori(img, new_shape=(640, 640), color=(0, 0, 0), auto=True, scaleFill=False, scaleup=True):
+    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # width, height 
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        #img = kneron_preprocessing.API.resize(img,size=new_unpad, keep_ratio = False)
+
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    # top, bottom = int(0), int(round(dh + 0.1))
+    # left, right = int(0), int(round(dw + 0.1))    
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    #img = kneron_preprocessing.API.pad(img, left, right, top, bottom, 0)
+
+    return img, ratio, (dw, dh)
+
+def letterbox(img, new_shape=(640, 640), color=(0, 0, 0), auto=True, scaleFill=False, scaleup=True):
+    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # width, height 
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+    # dw /= 2  # divide padding into 2 sides
+    # dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        #img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        img = kneron_preprocessing.API.resize(img,size=new_unpad, keep_ratio = False)
+
+    # top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    # left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    top, bottom = int(0), int(round(dh + 0.1))
+    left, right = int(0), int(round(dw + 0.1))    
+    #img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    img = kneron_preprocessing.API.pad(img, left, right, top, bottom, 0)
+
+    return img, ratio, (dw, dh)
+
+def letterbox_test(img, new_shape=(640, 640), color=(0, 0, 0), auto=True, scaleFill=False, scaleup=True):
+
+    ratio = 1.0, 1.0
+    dw, dh = 0, 0
+    img = kneron_preprocessing.API.resize(img, size=(480, 256), keep_ratio=False, type='bilinear')
+    return img, ratio, (dw, dh)
+
+def LoadImages(path,img_size):  #_rgb # for inference
+    if isinstance(path, str):
+        img0 = cv2.imread(path)  # BGR       
+    else:
+        img0 = path  # BGR
+
+    # Padded resize
+    img = letterbox(img0, new_shape=img_size)[0]
+    # Convert
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    return img, img0
+
+def LoadImages_yyy(path,img_size): #_yyy # for inference
+    if isinstance(path, str):
+        img0 = cv2.imread(path)  # BGR       
+    else:
+        img0 = path  # BGR
+
+    yvu = cv2.cvtColor(img0, cv2.COLOR_BGR2YCrCb)
+    y, v, u = cv2.split(yvu)
+    img0 = np.stack((y,)*3, axis=-1)
+
+    # Padded resize
+    img = letterbox(img0, new_shape=img_size)[0]
+
+    # Convert
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    return img, img0
+
+def LoadImages_yuv420(path,img_size):  #_yuv420 # for inference 
+    if isinstance(path, str):
+        img0 = cv2.imread(path)  # BGR       
+    else:
+        img0 = path  # BGR
+    img_h, img_w = img0.shape[:2]
+    img_h = (img_h // 2) * 2
+    img_w = (img_w // 2) * 2
+    img = img0[:img_h,:img_w,:]
+    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV_I420)
+    img0= cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR_I420) #yuv420
+
+    
+    # Padded resize
+    img = letterbox(img0, new_shape=img_size)[0]
+
+    # Convert
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    return img, img0
+
+def Yolov5_preprocess(image_path, device, imgsz_h, imgsz_w) : 
+    model_stride_max = 32
+    imgsz_h = check_img_size(imgsz_h, s=model_stride_max)  # check img_size
+    imgsz_w = check_img_size(imgsz_w, s=model_stride_max)  # check img_size
+    img, im0 = LoadImages(image_path, img_size=(imgsz_h,imgsz_w))
+    img = kneron_preprocessing.API.norm(img) #path1
+    #print('img',img.shape)
+    img = torch.from_numpy(img).to(device) #path1,path2
+    # img = img.float()  # uint8 to fp16/32 #path2
+    # img /= 255.0#256.0 - 0.5 # 0 - 255 to -0.5 - 0.5 #path2
+    
+    if img.ndimension() == 3:
+        img = img.unsqueeze(0)
+    
+    return img, im0
+
--- a/kneron/exporting/yolov5/yolov5_runner.py
+++ b/kneron/exporting/yolov5/yolov5_runner.py
@ -0,0 +1,91 @@
+import torch
+torch.set_printoptions(precision=10)
+torch.set_printoptions(threshold=99999999999)
+torch.backends.cudnn.deterministic = True
+from .yolov5_preprocess import *
+from .yolov5_postprocess import *
+from .yolo_v2 import Model as Model_v2
+import onnxruntime
+import time
+import os
+from collections import Counter
+import torch.nn.functional as F
+import random
+from pathlib import Path
+
+class Yolov5Runner:
+    def __init__(self, model_path, yaml_path, grid20_path, grid40_path, grid80_path, num_classes, imgsz_h, imgsz_w, conf_thres, iou_thres, top_k_num, vanish_point, **kwargs):#is_onnx,
+        """
+        inputs :
+            model_path : str ,path to model 
+        """
+        self.model_path = model_path
+        self.imgsz_h = imgsz_h
+        self.imgsz_w = imgsz_w
+        self.conf_thres = conf_thres
+        self.iou_thres = iou_thres
+        self.top_k_num = top_k_num
+        self.vanish_point = vanish_point
+        self.num_classes = num_classes
+        self.DEVICE = torch.device("cpu")#torch.device('cuda:0')#
+        self.grid20 = torch.from_numpy(np.load(grid20_path))
+        self.grid40 = torch.from_numpy(np.load(grid40_path))
+        self.grid80 = torch.from_numpy(np.load(grid80_path)) 
+        self.grids = [self.grid80, self.grid40, self.grid20]        
+        if 'onnx' not in model_path:
+            self.yolov5_model = Model_v2(yaml_path, nc=num_classes)                    
+            self.yolov5_model.load_state_dict(torch.load(model_path, map_location=self.DEVICE))#,strict=False)
+            self.yolov5_model.float().eval()
+            self.yolov5_model.to(self.DEVICE)
+            self.yolov5_model.eval()
+        else:
+            #onnxruntime.set_default_logger_severity(0)
+            self.sess = onnxruntime.InferenceSession(model_path)
+            # self.sess.set_providers(['CUDAExecutionProvider'])
+            self.input_name = self.sess.get_inputs()[0].name
+            self.onnx_batch_size = self.sess.get_inputs()[0].shape[0]
+            self.onnx_img_size_h = self.sess.get_inputs()[0].shape[2]
+            self.onnx_img_size_w = self.sess.get_inputs()[0].shape[3]  
+            
+        self.anchors = [[10, 13, 16, 30, 33, 23], [30, 61, 62, 45, 59, 119], [116, 90, 156, 198, 373, 326]] #yolov5            
+        print('self.vanish_point',self.vanish_point)                                                               
+        self.e2e_coco = kwargs.get('e2e_coco', False)
+ 
+    def run(self, img_path):
+        """
+        inputs : 
+            img_path : path of the image
+        outputs :
+            dets : list 
+        """
+        self.yolov5_model.eval()
+        with torch.no_grad():
+            img, im0 = Yolov5_preprocess(img_path, self.DEVICE, self.imgsz_h, self.imgsz_w)
+            if next(self.yolov5_model.parameters()).is_cuda:
+                img = img.type(torch.cuda.FloatTensor)
+            else:
+                img = img.type(torch.FloatTensor)
+            pred = self.yolov5_model(img, augment=False)
+            img_shape, im0_shape = img.shape, im0.shape               
+            dets = Yolov5_postprocess_sig(pred,img_shape, im0_shape, self.conf_thres, self.iou_thres, self.top_k_num, self.grids, self.num_classes, self.anchors,self.vanish_point)
+            return dets 
+
+    def run_onnx(self, img_path):
+        """
+        inputs : 
+            img_path : path of the image
+        outputs :
+            dets : list 
+        """
+        with torch.no_grad():
+            img, im0 = Yolov5_preprocess(img_path, self.DEVICE, self.imgsz_h, self.imgsz_w)
+            np_images = np.array(img.cpu())
+            np_images = np_images.astype(np.float32)
+            pred_onnx = self.sess.run(None, {self.input_name: np_images })
+            img_shape, im0_shape = img.shape, im0.shape 
+            # print('img_shape',img_shape)
+            # print('im0_shape', im0_shape)
+            dets_onnx = Yolov5_postprocess_onnx_sig(pred_onnx,img_shape, im0_shape, self.conf_thres, self.iou_thres, self.top_k_num, self.grids, self.num_classes, self.anchors,self.vanish_point, self.e2e_coco)
+            return dets_onnx
+
+
--- a/kneron/exporting/yolov5_export.py
+++ b/kneron/exporting/yolov5_export.py
@ -0,0 +1,80 @@
+import os
+import torch
+import sys
+import yaml
+import argparse
+
+from yolov5.yolov5_runner import Yolov5Runner
+
+def save_weight(num_classes): 
+    current_path=os.getcwd()
+    par_path = os.path.dirname(current_path)
+    sys.path.append(os.path.join(par_path, 'yolov5'))
+    from models.yolo import Model  
+    num_classes = num_classes 
+    device=torch.device('cpu')
+    ckpt = torch.load(path, map_location=device)
+    model = Model(yaml_path, nc=num_classes)
+    ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() if k in model.state_dict() and model.state_dict()[k].shape == v.shape}
+    model.load_state_dict(ckpt['model'])
+    torch.save(model.state_dict(),pt_path,_use_new_zipfile_serialization=False)
+    
+def export_onnx(input_h, input_w, num_classes):
+
+    onnx_batch_size, onnx_img_h, onnx_img_w = 1, input_h, input_w
+    yolov5_model = Yolov5Runner(model_path=pt_path, yaml_path=yaml_path, grid20_path=grid20_path, grid40_path=grid40_path, grid80_path=grid80_path, num_classes=num_classes, imgsz_h=onnx_img_h, imgsz_w=onnx_img_w, conf_thres=0.001, iou_thres=0.65, top_k_num=3000, vanish_point=0.0) 
+    
+    # Input
+    img = torch.zeros((onnx_batch_size, 3, onnx_img_h, onnx_img_w))  
+    # img = img.type(torch.cuda.FloatTensor)
+
+    # Load PyTorch model
+    model = yolov5_model.yolov5_model
+    model.eval()
+    model.model[-1].export = True  # set Detect() layer export=True
+    y = model(img)  # dry run
+
+    # ONNX export
+    try:
+        import onnx
+        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+        print('****onnx file****',onnx_export_file)
+        torch.onnx.export(model, img, onnx_export_file, verbose=False, opset_version=11, keep_initializers_as_inputs=True, input_names=['images'], output_names=['classes', 'boxes'] if y is None else ['output'])
+        # Checks
+        onnx_model = onnx.load(onnx_export_file)  # load onnx model
+        onnx.checker.check_model(onnx_model)  # check onnx model
+        print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+        print('ONNX export success, saved as %s' % onnx_export_file)
+    except Exception as e:
+        print('ONNX export failure: %s' % e)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default='../yolov5/data/pretrained_paths_520.yaml', help='the path to pretrained model paths yaml file')
+
+    args = parser.parse_args()
+    
+    with open(args.data) as f:
+        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
+        
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    num_classes = data_dict['nc']
+    input_w = data_dict['input_w']
+    input_h = data_dict['input_h']
+    grid_dir = data_dict['grid_dir']
+    grid20_path = data_dict['grid20_path']
+    grid40_path = data_dict['grid40_path']
+    grid80_path = data_dict['grid80_path']
+    path = data_dict['path']
+    pt_path=data_dict['pt_path']
+    yaml_path=data_dict['yaml_path']
+    onnx_export_file = data_dict['onnx_export_file']
+    save_weight(num_classes)
+    export_onnx(input_h, input_w, num_classes)
+
+
+
+
+
+
--- a/kneron/inference.py
+++ b/kneron/inference.py
@ -0,0 +1,64 @@
+import os
+import sys
+import argparse
+import yaml
+import cv2
+import numpy as np
+
+def draw(img_path, bboxes, save_path = None, names = None):
+    
+    img = cv2.imread(img_path)
+    for bbox in bboxes:
+        l,t,w,h,score,class_id=bbox
+        if names is not None:
+            class_id = names[int(class_id)]
+        img = cv2.rectangle(img,(int(l),int(t)),(int(l+w),int(t+h)),(0, 255, 0),6)
+        text = "{}".format(class_id) + "  {}".format(np.round(score, 3))
+        img = cv2.putText(img, text, (int(l), int(t)-10), cv2.FONT_HERSHEY_SIMPLEX, 0.5, (0, 255, 0), 2)
+    if save_path is None:
+        save_path = img_path
+    output_file = os.path.join(save_path, "output.jpg")  # 確保有圖片副檔名
+    cv2.imwrite(output_file, img)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--img-path', type=str, default=None, help='path to image')
+    parser.add_argument('--save-path', type=str, default=None, help='path to save image')
+    parser.add_argument('--data', type=str, default='data/pretrained_paths_520.yaml', help='the path to pretrained model paths yaml file')
+    parser.add_argument('--conf_thres', type=float, default=0.3, help='confidence threshold')
+    parser.add_argument('--iou_thres', type=float, default=0.5, help='iou threshold for NMS')
+    parser.add_argument('--onnx', help='inference onnx model',action='store_true')
+    
+    args = parser.parse_args()
+    
+    par_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
+    sys.path.append(par_path)
+    sys.path.append(os.path.join(par_path, 'exporting') )
+
+    from yolov5.yolov5_runner import Yolov5Runner
+
+    with open(args.data) as f:
+        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
+        
+    num_classes = data_dict['nc']
+    input_w = data_dict['input_w']
+    input_h = data_dict['input_h']
+    grid_dir = data_dict['grid_dir']
+    grid20_path = data_dict['grid20_path']
+    grid40_path = data_dict['grid40_path']
+    grid80_path = data_dict['grid80_path']
+    path = data_dict['path']
+    
+    
+    if args.onnx:
+        yolov5_model = Yolov5Runner(model_path=data_dict['onnx_export_file'], yaml_path=data_dict['yaml_path'], grid20_path=grid20_path, grid40_path=grid40_path, grid80_path=grid80_path, num_classes=num_classes, imgsz_h=input_h, imgsz_w=input_w, conf_thres=args.conf_thres, iou_thres=args.iou_thres, top_k_num=3000, vanish_point=0.0) 
+        bboxes = yolov5_model.run_onnx(args.img_path)
+    else:
+        yolov5_model = Yolov5Runner(model_path=data_dict['pt_path'], yaml_path=data_dict['yaml_path'], grid20_path=grid20_path, grid40_path=grid40_path, grid80_path=grid80_path, num_classes=num_classes, imgsz_h=input_h, imgsz_w=input_w, conf_thres=args.conf_thres, iou_thres=args.iou_thres, top_k_num=3000, vanish_point=0.0) 
+        bboxes = yolov5_model.run(args.img_path)
+    
+    print(bboxes)
+    
+    if args.save_path is not None:
+        draw(args.img_path, bboxes, save_path = args.save_path, names = data_dict['names'])
--- a/kneron/inference_e2e.py
+++ b/kneron/inference_e2e.py
@ -0,0 +1,53 @@
+import os
+import sys
+import argparse
+import yaml
+from tqdm import tqdm
+import json
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--img-path', type=str, help='Path to the dataset directory.')
+    parser.add_argument('--params', type=str, help='Path to the init params file.')
+    parser.add_argument('--save-path', type=str, help='Path to save output in json.')
+    
+    args = parser.parse_args()
+    
+    par_path = os.path.abspath(os.path.join(os.getcwd(), os.pardir))
+    sys.path.append(par_path)
+    sys.path.append(os.path.join(par_path, 'exporting') )
+
+    from yolov5.yolov5_runner import Yolov5Runner
+
+    with open(args.params, "r", encoding="utf-8") as f:
+        params_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
+        
+    num_classes = params_dict['nc']
+    input_w = params_dict['input_w']
+    input_h = params_dict['input_h']
+    grid20_path = params_dict['grid20_path']
+    grid40_path = params_dict['grid40_path']
+    grid80_path = params_dict['grid80_path']
+    conf_thres = params_dict['conf_thres']
+    iou_thres = params_dict['iou_thres']
+    model_type = params_dict['model_type']
+    e2e_coco = params_dict['e2e_coco']
+
+    if model_type == 'onnx':
+        yolov5_model = Yolov5Runner(model_path=params_dict['onnx_path'], yaml_path=params_dict['model_yaml_path'], grid20_path=grid20_path, grid40_path=grid40_path, grid80_path=grid80_path, num_classes=num_classes, imgsz_h=input_h, imgsz_w=input_w, conf_thres=conf_thres, iou_thres=iou_thres, top_k_num=3000, vanish_point=0.0, e2e_coco=e2e_coco)
+    else:
+        yolov5_model = Yolov5Runner(model_path=params_dict['pt_path'], yaml_path=params_dict['model_yaml_path'], grid20_path=grid20_path, grid40_path=grid40_path, grid80_path=grid80_path, num_classes=num_classes, imgsz_h=input_h, imgsz_w=input_w, conf_thres=conf_thres, iou_thres=iou_thres, top_k_num=3000, vanish_point=0.0, e2e_coco=e2e_coco)
+
+    img_list = os.listdir(args.img_path)
+    results = []
+    for img_name in tqdm(img_list):
+        if img_name.split('.')[-1] not in ['png', 'jpg']:
+            continue
+        img_path = os.path.join(args.img_path, img_name)
+        if model_type == 'onnx':
+            bboxes = yolov5_model.run_onnx(img_path)
+        else:
+            bboxes = yolov5_model.run(img_path)
+        results.append({'img_path': img_path, 'bbox': bboxes } )
+    with open(args.save_path, 'w') as fp:
+        json.dump(results, fp)
--- a/kneron/ktc720.py
+++ b/kneron/ktc720.py
@ -0,0 +1,72 @@
+import ktc
+import numpy as np
+import os
+import onnx
+from PIL import Image
+import torch
+from yolov5_preprocess import Yolov5_preprocess
+import kneron_preprocessing
+
+onnx_path = 'runs/train/exp24/weights/best_simplified.onnx'
+m = onnx.load(onnx_path)
+m = ktc.onnx_optimizer.onnx2onnx_flow(m)
+onnx.save(m,'latest.opt.onnx')
+km = ktc.ModelConfig(20008, "0001", "720", onnx_model=m)
+eval_result = km.evaluate()
+print("\nNpu performance evaluation result:\n" + str(eval_result))
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+imgsz_h, imgsz_w = 640, 640
+
+data_path = "data50"
+files_found = [f for _, _, files in os.walk(data_path) for f in files if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))]
+
+if not files_found:
+    raise FileNotFoundError(f"❌ Error: No images found in {data_path}! Please check your dataset.")
+
+print(f"✅ Found {len(files_found)} images in {data_path}")
+
+# **獲取 ONNX 模型的輸入名稱**
+input_name = m.graph.input[0].name  # 確保 key 與 ONNX input name 一致
+# 存儲預處理後的圖片數據
+img_list = []
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+
+        # **只處理圖片文件**
+        if not f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
+            print(f"⚠️ Skipping non-image file: {fullpath}")
+            continue
+
+        # **嘗試處理圖片**
+        try:
+            img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+            img_data = img_data.cpu().numpy()
+            print(f"✅ Processed: {fullpath}")
+            img_list.append(img_data)
+        except Exception as e:
+            print(f"❌ Failed to process {fullpath}: {e}")
+
+# **確保 img_list 不是空的**
+if not img_list:
+    raise ValueError("❌ Error: No valid images were processed! Please check the image paths and formats.")
+
+# **執行 BIE 量化**
+bie_model_path = km.analysis({input_name: img_list})
+
+# **確認 BIE 模型是否生成**
+if not os.path.exists(bie_model_path):
+    raise RuntimeError(f"❌ Error: BIE model was not generated! Please check your quantization process.")
+
+# 顯示成功訊息
+print("\n✅ Fixed-point analysis done! BIE model saved to:", bie_model_path)
+
+# 確保 `km` 已經初始化，並且 `.bie` 模型已生成
+nef_model_path = ktc.compile([km])
+
+# 顯示成功訊息
+print("\n✅ Compile done! NEF file saved to:", nef_model_path)
--- a/kneron/oldquantize_yolov5.py
+++ b/kneron/oldquantize_yolov5.py
@ -0,0 +1,33 @@
+import os
+import numpy as np
+import torch
+from yolov5_preprocess import Yolov5_preprocess  # 使用你的預處理
+import kneron_preprocessing
+
+# 設定裝置
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# 設定圖片大小（與訓練時一致）
+imgsz_h, imgsz_w = 640, 640
+
+# 量化數據集目錄（請確保這個資料夾存在）
+data_path = "/data50"
+img_list = []
+
+# 遍歷 voc_data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+        
+        # 執行與訓練相同的預處理
+        img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+
+        print(f"Processed: {fullpath}")
+        img_list.append(img_data)
+
+# 轉為 NumPy 格式
+img_list = np.array(img_list)
+
+# 執行 BIE 量化分析
+bie_model_path = km.analysis({"input": img_list})
+print("\nFixed-point analysis done. Saved bie model to '" + str(bie_model_path) + "'")
--- a/kneron/onnx2nef520.py
+++ b/kneron/onnx2nef520.py
@ -0,0 +1,110 @@
+import ktc
+import numpy as np
+import os
+import onnx
+import shutil
+from PIL import Image
+import torch
+from yolov5_preprocess import Yolov5_preprocess
+import kneron_preprocessing
+
+# 設定 ONNX 模型儲存路徑
+onnx_dir = 'runs/train/exp24/weights/'
+onnx_path = os.path.join(onnx_dir, 'best_no_sigmoid.onnx')
+
+# 確保目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 加載並優化 ONNX 模型
+m = onnx.load(onnx_path)
+m = ktc.onnx_optimizer.onnx2onnx_flow(m)
+opt_onnx_path = os.path.join(onnx_dir, 'latest.opt.onnx')
+onnx.save(m, opt_onnx_path)
+
+km = ktc.ModelConfig(20008, "0001", "520", onnx_model=m)
+eval_result = km.evaluate()
+print("\nNpu performance evaluation result:\n" + str(eval_result))
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+imgsz_h, imgsz_w = 640, 640
+
+data_path = "datacoin"
+files_found = [f for _, _, files in os.walk(data_path) for f in files if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))]
+
+if not files_found:
+    raise FileNotFoundError(f"\u274c Error: No images found in {data_path}! Please check your dataset.")
+
+print(f"\u2705 Found {len(files_found)} images in {data_path}")
+
+# 獲取 ONNX 模型的輸入名稱
+input_name = m.graph.input[0].name  # 確保 key 與 ONNX input name 一致
+
+# 存儲預處理後的圖片數據
+img_list = []
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+
+        # 只處理圖片文件
+        if not f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
+            print(f"\u26a0\ufe0f Skipping non-image file: {fullpath}")
+            continue
+
+        # 嘗試處理圖片
+        try:
+            img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+            img_data = img_data.cpu().numpy()
+            print(f"\u2705 Processed: {fullpath}")
+            img_list.append(img_data)
+        except Exception as e:
+            print(f"\u274c Failed to process {fullpath}: {e}")
+
+# 確保 img_list 不是空的
+if not img_list:
+    raise ValueError("\u274c Error: No valid images were processed! Please check the image paths and formats.")
+
+# 執行 BIE 量化
+bie_model_path = km.analysis({input_name: img_list})
+
+# 確保 BIE 檔案儲存到指定目錄
+bie_save_path = os.path.join(onnx_dir, os.path.basename(bie_model_path))
+shutil.copy(bie_model_path, bie_save_path)  # 使用 shutil.move 來處理跨磁碟移動
+
+# 確認 BIE 模型是否生成
+if not os.path.exists(bie_save_path):
+    raise RuntimeError(f"\u274c Error: BIE model was not generated! Please check your quantization process.")
+
+print("\n\u2705 Fixed-point analysis done! BIE model saved to:", bie_save_path)
+
+# 確保 `km` 已經初始化，並且 `.bie` 模型已生成
+nef_model_path = ktc.compile([km])
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 NEF 目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 .nef 檔案存在
+if not os.path.exists(nef_model_path):
+    raise RuntimeError(f"❌ Error: NEF model was not generated at {nef_model_path}! Please check your compilation process.")
+
+# 確保 NEF 檔案儲存到指定目錄
+nef_save_path = os.path.join(onnx_dir, os.path.basename(nef_model_path))
+if os.path.exists(nef_model_path):
+    shutil.copy(nef_model_path, nef_save_path)
+else:
+    raise RuntimeError(f"❌ Error: NEF model was expected at {nef_model_path}, but it does not exist!")  # 同樣使用 shutil.move
+
+if not os.path.exists(nef_save_path):
+    raise RuntimeError(f"\u274c Error: NEF model was not generated! Please check your compilation process.")
+
+print("\n\u2705 Compile done! NEF file saved to:", nef_save_path)
--- a/kneron/onnx2nef630.py
+++ b/kneron/onnx2nef630.py
@ -0,0 +1,110 @@
+import ktc
+import numpy as np
+import os
+import onnx
+import shutil
+from PIL import Image
+import torch
+from yolov5_preprocess import Yolov5_preprocess
+import kneron_preprocessing
+
+# 設定 ONNX 模型儲存路徑
+onnx_dir = 'runs/train/exp29/weights/'
+onnx_path = os.path.join(onnx_dir, 'best_simplified.onnx')
+
+# 確保目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 加載並優化 ONNX 模型
+m = onnx.load(onnx_path)
+m = ktc.onnx_optimizer.onnx2onnx_flow(m)
+opt_onnx_path = os.path.join(onnx_dir, 'latest.opt.onnx')
+onnx.save(m, opt_onnx_path)
+
+km = ktc.ModelConfig(20008, "0001", "630", onnx_model=m)
+eval_result = km.evaluate()
+print("\nNpu performance evaluation result:\n" + str(eval_result))
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+imgsz_h, imgsz_w = 640, 640
+
+data_path = "data4"
+files_found = [f for _, _, files in os.walk(data_path) for f in files if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))]
+
+if not files_found:
+    raise FileNotFoundError(f"\u274c Error: No images found in {data_path}! Please check your dataset.")
+
+print(f"\u2705 Found {len(files_found)} images in {data_path}")
+
+# 獲取 ONNX 模型的輸入名稱
+input_name = m.graph.input[0].name  # 確保 key 與 ONNX input name 一致
+
+# 存儲預處理後的圖片數據
+img_list = []
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+
+        # 只處理圖片文件
+        if not f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
+            print(f"\u26a0\ufe0f Skipping non-image file: {fullpath}")
+            continue
+
+        # 嘗試處理圖片
+        try:
+            img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+            img_data = img_data.cpu().numpy()
+            print(f"\u2705 Processed: {fullpath}")
+            img_list.append(img_data)
+        except Exception as e:
+            print(f"\u274c Failed to process {fullpath}: {e}")
+
+# 確保 img_list 不是空的
+if not img_list:
+    raise ValueError("\u274c Error: No valid images were processed! Please check the image paths and formats.")
+
+# 執行 BIE 量化
+bie_model_path = km.analysis({input_name: img_list})
+
+# 確保 BIE 檔案儲存到指定目錄
+bie_save_path = os.path.join(onnx_dir, os.path.basename(bie_model_path))
+shutil.copy(bie_model_path, bie_save_path)  # 使用 shutil.move 來處理跨磁碟移動
+
+# 確認 BIE 模型是否生成
+if not os.path.exists(bie_save_path):
+    raise RuntimeError(f"\u274c Error: BIE model was not generated! Please check your quantization process.")
+
+print("\n\u2705 Fixed-point analysis done! BIE model saved to:", bie_save_path)
+
+# 確保 `km` 已經初始化，並且 `.bie` 模型已生成
+nef_model_path = ktc.compile([km])
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 NEF 目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 .nef 檔案存在
+if not os.path.exists(nef_model_path):
+    raise RuntimeError(f"❌ Error: NEF model was not generated at {nef_model_path}! Please check your compilation process.")
+
+# 確保 NEF 檔案儲存到指定目錄
+nef_save_path = os.path.join(onnx_dir, os.path.basename(nef_model_path))
+if os.path.exists(nef_model_path):
+    shutil.copy(nef_model_path, nef_save_path)
+else:
+    raise RuntimeError(f"❌ Error: NEF model was expected at {nef_model_path}, but it does not exist!")  # 同樣使用 shutil.move
+
+if not os.path.exists(nef_save_path):
+    raise RuntimeError(f"\u274c Error: NEF model was not generated! Please check your compilation process.")
+
+print("\n\u2705 Compile done! NEF file saved to:", nef_save_path)
--- a/kneron/onnx2nef720.py
+++ b/kneron/onnx2nef720.py
@ -0,0 +1,110 @@
+import ktc
+import numpy as np
+import os
+import onnx
+import shutil
+from PIL import Image
+import torch
+from yolov5_preprocess import Yolov5_preprocess
+import kneron_preprocessing
+
+# 設定 ONNX 模型儲存路徑
+onnx_dir = 'runs/train/exp73/weights/'
+onnx_path = os.path.join(onnx_dir, 'best_simplified.onnx')
+
+# 確保目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 加載並優化 ONNX 模型
+m = onnx.load(onnx_path)
+m = ktc.onnx_optimizer.onnx2onnx_flow(m)
+opt_onnx_path = os.path.join(onnx_dir, 'latest.opt.onnx')
+onnx.save(m, opt_onnx_path)
+
+km = ktc.ModelConfig(20008, "0001", "720", onnx_model=m)
+eval_result = km.evaluate()
+print("\nNpu performance evaluation result:\n" + str(eval_result))
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+imgsz_h, imgsz_w = 640, 640
+
+data_path = "data50"
+files_found = [f for _, _, files in os.walk(data_path) for f in files if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))]
+
+if not files_found:
+    raise FileNotFoundError(f"\u274c Error: No images found in {data_path}! Please check your dataset.")
+
+print(f"\u2705 Found {len(files_found)} images in {data_path}")
+
+# 獲取 ONNX 模型的輸入名稱
+input_name = m.graph.input[0].name  # 確保 key 與 ONNX input name 一致
+
+# 存儲預處理後的圖片數據
+img_list = []
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+
+        # 只處理圖片文件
+        if not f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
+            print(f"\u26a0\ufe0f Skipping non-image file: {fullpath}")
+            continue
+
+        # 嘗試處理圖片
+        try:
+            img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+            img_data = img_data.cpu().numpy()
+            print(f"\u2705 Processed: {fullpath}")
+            img_list.append(img_data)
+        except Exception as e:
+            print(f"\u274c Failed to process {fullpath}: {e}")
+
+# 確保 img_list 不是空的
+if not img_list:
+    raise ValueError("\u274c Error: No valid images were processed! Please check the image paths and formats.")
+
+# 執行 BIE 量化
+bie_model_path = km.analysis({input_name: img_list})
+
+# 確保 BIE 檔案儲存到指定目錄
+bie_save_path = os.path.join(onnx_dir, os.path.basename(bie_model_path))
+shutil.copy(bie_model_path, bie_save_path)  # 使用 shutil.move 來處理跨磁碟移動
+
+# 確認 BIE 模型是否生成
+if not os.path.exists(bie_save_path):
+    raise RuntimeError(f"\u274c Error: BIE model was not generated! Please check your quantization process.")
+
+print("\n\u2705 Fixed-point analysis done! BIE model saved to:", bie_save_path)
+
+# 確保 `km` 已經初始化，並且 `.bie` 模型已生成
+nef_model_path = ktc.compile([km])
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 NEF 目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 .nef 檔案存在
+if not os.path.exists(nef_model_path):
+    raise RuntimeError(f"❌ Error: NEF model was not generated at {nef_model_path}! Please check your compilation process.")
+
+# 確保 NEF 檔案儲存到指定目錄
+nef_save_path = os.path.join(onnx_dir, os.path.basename(nef_model_path))
+if os.path.exists(nef_model_path):
+    shutil.copy(nef_model_path, nef_save_path)
+else:
+    raise RuntimeError(f"❌ Error: NEF model was expected at {nef_model_path}, but it does not exist!")  # 同樣使用 shutil.move
+
+if not os.path.exists(nef_save_path):
+    raise RuntimeError(f"\u274c Error: NEF model was not generated! Please check your compilation process.")
+
+print("\n\u2705 Compile done! NEF file saved to:", nef_save_path)
--- a/kneron/onnx2nefSTDC630.py
+++ b/kneron/onnx2nefSTDC630.py
@ -0,0 +1,110 @@
+import ktc
+import numpy as np
+import os
+import onnx
+import shutil
+from PIL import Image
+import torch
+from yolov5_preprocess import Yolov5_preprocess
+import kneron_preprocessing
+
+# 設定 ONNX 模型儲存路徑
+onnx_dir = 'work_dirs/kn_stdc1_in1k-pre_512x1024_80k_cityscapes/'
+onnx_path = os.path.join(onnx_dir, 'latest.onnx')
+
+# 確保目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 加載並優化 ONNX 模型
+m = onnx.load(onnx_path)
+m = ktc.onnx_optimizer.onnx2onnx_flow(m)
+opt_onnx_path = os.path.join(onnx_dir, 'latest.opt.onnx')
+onnx.save(m, opt_onnx_path)
+
+km = ktc.ModelConfig(20008, "0001", "630", onnx_model=m)
+eval_result = km.evaluate()
+print("\nNpu performance evaluation result:\n" + str(eval_result))
+
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+imgsz_h, imgsz_w = 640, 640
+
+data_path = "data50"
+files_found = [f for _, _, files in os.walk(data_path) for f in files if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))]
+
+if not files_found:
+    raise FileNotFoundError(f"\u274c Error: No images found in {data_path}! Please check your dataset.")
+
+print(f"\u2705 Found {len(files_found)} images in {data_path}")
+
+# 獲取 ONNX 模型的輸入名稱
+input_name = m.graph.input[0].name  # 確保 key 與 ONNX input name 一致
+
+# 存儲預處理後的圖片數據
+img_list = []
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+
+        # 只處理圖片文件
+        if not f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
+            print(f"\u26a0\ufe0f Skipping non-image file: {fullpath}")
+            continue
+
+        # 嘗試處理圖片
+        try:
+            img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+            img_data = img_data.cpu().numpy()
+            print(f"\u2705 Processed: {fullpath}")
+            img_list.append(img_data)
+        except Exception as e:
+            print(f"\u274c Failed to process {fullpath}: {e}")
+
+# 確保 img_list 不是空的
+if not img_list:
+    raise ValueError("\u274c Error: No valid images were processed! Please check the image paths and formats.")
+
+# 執行 BIE 量化
+bie_model_path = km.analysis({input_name: img_list})
+
+# 確保 BIE 檔案儲存到指定目錄
+bie_save_path = os.path.join(onnx_dir, os.path.basename(bie_model_path))
+shutil.copy(bie_model_path, bie_save_path)  # 使用 shutil.move 來處理跨磁碟移動
+
+# 確認 BIE 模型是否生成
+if not os.path.exists(bie_save_path):
+    raise RuntimeError(f"\u274c Error: BIE model was not generated! Please check your quantization process.")
+
+print("\n\u2705 Fixed-point analysis done! BIE model saved to:", bie_save_path)
+
+# 確保 `km` 已經初始化，並且 `.bie` 模型已生成
+nef_model_path = ktc.compile([km])
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 NEF 目標資料夾存在
+os.makedirs(onnx_dir, exist_ok=True)
+
+# 確保 nef_model_path 不是 None 或空值
+if not nef_model_path:
+    raise RuntimeError("❌ Error: ktc.compile() did not return a valid .nef file path!")
+
+# 確保 .nef 檔案存在
+if not os.path.exists(nef_model_path):
+    raise RuntimeError(f"❌ Error: NEF model was not generated at {nef_model_path}! Please check your compilation process.")
+
+# 確保 NEF 檔案儲存到指定目錄
+nef_save_path = os.path.join(onnx_dir, os.path.basename(nef_model_path))
+if os.path.exists(nef_model_path):
+    shutil.copy(nef_model_path, nef_save_path)
+else:
+    raise RuntimeError(f"❌ Error: NEF model was expected at {nef_model_path}, but it does not exist!")  # 同樣使用 shutil.move
+
+if not os.path.exists(nef_save_path):
+    raise RuntimeError(f"\u274c Error: NEF model was not generated! Please check your compilation process.")
+
+print("\n\u2705 Compile done! NEF file saved to:", nef_save_path)
--- a/kneron/preprocessing/API.py
+++ b/kneron/preprocessing/API.py
@ -0,0 +1,684 @@
+# -*- coding: utf-8 -*-
+
+import numpy as np
+import os
+from .funcs.utils import str2int, str2bool
+from . import Flow
+
+flow = Flow()
+flow.set_numerical_type('floating')
+flow_520 = Flow()
+flow_520.set_numerical_type('520')
+flow_720 = Flow()
+flow_720.set_numerical_type('720')
+
+DEFAULT = None
+default = {
+    'crop':{
+        'align_w_to_4':False
+        },
+    'resize':{
+        'type':'bilinear',
+        'calculate_ratio_using_CSim':False
+        }
+}
+
+def set_default_as_520():
+    """
+    Set some default parameter as 520 setting
+
+    crop.align_w_to_4 = True
+    crop.pad_square_to_4 = True
+    resize.type = 'fixed_520'
+    resize.calculate_ratio_using_CSim = True
+    """
+    global default
+    default['crop']['align_w_to_4'] = True
+    default['resize']['type'] = 'fixed_520'
+    default['resize']['calculate_ratio_using_CSim'] = True
+    return
+
+def set_default_as_floating():
+    """
+    Set some default parameter as floating setting
+
+    crop.align_w_to_4 = False
+    crop.pad_square_to_4 = False
+    resize.type = 'bilinear'
+    resize.calculate_ratio_using_CSim = False
+    """
+    global default
+    default['crop']['align_w_to_4'] = False
+    default['resize']['type'] = 'bilinear'
+    default['resize']['calculate_ratio_using_CSim'] = False
+    pass
+
+def print_info_on():
+    """
+    turn print infomation on.
+    """
+    flow.set_print_info(True)
+    flow_520.set_print_info(True)
+
+def print_info_off():
+    """
+    turn print infomation off.
+    """
+    flow.set_print_info(False)
+    flow_520.set_print_info(False)
+
+def load_image(image):
+    """
+    load_image function
+    load load_image and output as rgb888 format np.array
+
+    Args:
+        image: [np.array/str], can be np.array or image file path
+
+    Returns:
+        out: [np.array], rgb888 format
+
+    Examples:
+    """
+    image = flow.load_image(image, is_raw = False)
+    return image
+
+def load_bin(image, fmt=None, size=None):
+    """
+    load_bin function
+    load bin file and output as rgb888 format np.array
+
+    Args:
+        image: [str], bin file path
+        fmt: [str], "rgb888" / "rgb565" / "nir"
+        size: [tuble], (image_w, image_h)
+
+    Returns:
+        out: [np.array], rgb888 format
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.load_bin(image,'rgb565',(raw_w,raw_h))
+    """    
+    assert isinstance(size, tuple)
+    assert isinstance(fmt, str)
+    # assert (fmt.lower() in ['rgb888', "rgb565" , "nir",'RGB888', "RGB565" , "NIR", 'NIR888', 'nir888'])
+
+    image = flow.load_image(image, is_raw = True, raw_img_type='bin', raw_img_fmt = fmt, img_in_width = size[0], img_in_height = size[1])
+    flow.set_color_conversion(source_format=fmt, out_format = 'rgb888')
+    image,_ = flow.funcs['color'](image)
+    return image
+
+def load_hex(file, fmt=None, size=None):
+    """
+    load_hex function
+    load hex file and output as rgb888 format np.array
+
+    Args:
+        image: [str], hex file path
+        fmt: [str], "rgb888" / "yuv444" / "ycbcr444" / "yuv422" / "ycbcr422" / "rgb565"
+        size: [tuble], (image_w, image_h)
+
+    Returns:
+        out: [np.array], rgb888 format
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.load_hex(image,'rgb565',(raw_w,raw_h))
+    """  
+    assert isinstance(size, tuple)
+    assert isinstance(fmt, str)
+    assert (fmt.lower() in ['rgb888',"yuv444" , "ycbcr444" , "yuv422" , "ycbcr422" , "rgb565"])
+
+    image = flow.load_image(file, is_raw = True, raw_img_type='hex', raw_img_fmt = fmt, img_in_width = size[0], img_in_height = size[1])
+    flow.set_color_conversion(source_format=fmt, out_format = 'rgb888')
+    image,_ = flow.funcs['color'](image)
+    return image
+
+def dump_image(image, output=None, file_fmt='txt',image_fmt='rgb888',order=0):
+    """
+    dump_image function
+
+    dump txt, bin or hex, default is txt
+    image format as following format: RGB888, RGBA8888, RGB565, NIR, YUV444, YCbCr444, YUV422, YCbCr422, default is RGB888
+
+    Args:
+        image: [np.array/str], can be np.array or image file path
+        output: [str], dump file path
+        file_fmt: [str], "bin" / "txt" / "hex", set dump file format, default is txt
+        image_fmt: [str], RGB888 / RGBA8888 / RGB565 / NIR / YUV444 / YCbCr444 / YUV422 / YCbCr422, default is RGB888
+
+    Examples:
+        >>> kneron_preprocessing.API.dump_image(image_data,out_path,fmt='bin')
+    """
+    if isinstance(image, str):
+        image = load_image(image)
+
+    assert isinstance(image, np.ndarray)
+    if output is None:
+        return
+
+    flow.set_output_setting(is_dump=False, dump_format=file_fmt, image_format=image_fmt ,output_file=output)
+    flow.dump_image(image)
+    return
+
+def convert(image, out_fmt = 'RGB888', source_fmt = 'RGB888'):
+    """
+    color convert
+
+    Args:
+        image: [np.array], input
+        out_fmt: [str], "rgb888" / "rgba8888" / "rgb565" / "yuv" / "ycbcr" / "yuv422" / "ycbcr422"
+        source_fmt: [str], "rgb888" / "rgba8888" / "rgb565" / "yuv" / "ycbcr" / "yuv422" / "ycbcr422"
+
+    Returns:
+        out: [np.array]
+
+    Examples:
+
+    """  
+    flow.set_color_conversion(source_format = source_fmt, out_format=out_fmt, simulation=False)
+    image,_ = flow.funcs['color'](image)
+    return image
+
+def get_crop_range(box,align_w_to_4=DEFAULT, pad_square_to_4=False,rounding_type=0):
+    """
+    get exact crop box according different setting
+
+    Args:
+        box: [tuble], (x1, y1, x2, y2)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+
+    Returns:
+        out: [tuble,4], (crop_x1, crop_y1, crop_x2, crop_y2) 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.get_crop_range((272,145,461,341), align_w_to_4=True, pad_square_to_4=True)
+        (272, 145, 460, 341)
+    """  
+    if box is None:
+        return (0,0,0,0)
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='specific', start_x=box[0],start_y=box[1],end_x=box[2],end_y=box[3], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4,rounding_type=rounding_type)
+    image = np.zeros((1,1,3)).astype('uint8')
+    _,info = flow.funcs['crop'](image)
+    
+    return info['box']
+
+def crop(image, box=None, align_w_to_4=DEFAULT, pad_square_to_4=False,rounding_type=0 ,info_out = {}):
+    """
+    crop function
+
+    specific crop range by box
+
+    Args:
+        image: [np.array], input
+        box: [tuble], (x1, y1, x2, y2)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+        info_out: [dic], save the final crop box into info_out['box']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop(image_data,(272,145,461,341), align_w_to_4=True, info_out=info)
+        >>> info['box']
+        (272, 145, 460, 341)
+
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop(image_data,(272,145,461,341), pad_square_to_4=True, info_out=info)
+        >>> info['box']
+        (268, 145, 464, 341)
+    """  
+    assert isinstance(image, np.ndarray)
+    if box is None:
+        return image
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='specific', start_x=box[0],start_y=box[1],end_x=box[2],end_y=box[3], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4,rounding_type=rounding_type)
+    image,info = flow.funcs['crop'](image)
+    
+    info_out['box'] = info['box']
+    return image
+
+def crop_center(image, range=None, align_w_to_4=DEFAULT, pad_square_to_4=False,rounding_type=0 ,info_out = {}):
+    """
+    crop function
+
+    center crop by range
+
+    Args:
+        image: [np.array], input
+        range: [tuble], (crop_w, crop_h)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+        info_out: [dic], save the final crop box into info_out['box']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_center(image_data,(102,40), align_w_to_4=True,info_out=info)
+        >>> info['box']
+        (268, 220, 372, 260)
+
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_center(image_data,(102,40), pad_square_to_4=True, info_out=info)
+        >>> info['box']
+        (269, 192, 371, 294)
+    """   
+    assert isinstance(image, np.ndarray)
+    if range is None:
+        return image
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='center', crop_w=range[0],crop_h=range[1], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4,rounding_type=rounding_type)
+    image,info = flow.funcs['crop'](image)
+
+    info_out['box'] = info['box']
+    return image
+
+def crop_corner(image, range=None, align_w_to_4=DEFAULT,pad_square_to_4=False,rounding_type=0 ,info_out = {}):
+    """
+    crop function
+
+    corner crop by range
+
+    Args:
+        image: [np.array], input
+        range: [tuble], (crop_w, crop_h)
+        align_w_to_4: [bool], crop length in w direction align to 4 or not, default False
+        pad_square_to_4: [bool], pad to square(align 4) or not, default False
+        rounding_type: [int], 0-> x1,y1 take floor, x2,y2 take ceil; 1->all take rounding 
+        info_out: [dic], save the final crop box into info_out['box']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_corner(image_data,(102,40), align_w_to_4=True,info_out=info)
+        >>> info['box']
+        (0, 0, 104, 40)
+
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.crop_corner(image_data,(102,40), pad_square_to_4=True,info_out=info)
+        >>> info['box']
+        (0, -28, 102, 74)
+    """
+    assert isinstance(image, np.ndarray)
+    if range is None:
+        return image
+    if align_w_to_4 is None:
+        align_w_to_4 = default['crop']['align_w_to_4']
+
+    flow.set_crop(type='corner', crop_w=range[0],crop_h=range[1], align_w_to_4=align_w_to_4, pad_square_to_4=pad_square_to_4)
+    image, info = flow.funcs['crop'](image)
+
+    info_out['box'] = info['box']
+    return image
+
+def resize(image, size=None, keep_ratio = True, zoom = True, type=DEFAULT, calculate_ratio_using_CSim = DEFAULT, info_out = {}):
+    """
+    resize function
+
+    resize type can be bilinear or bilicubic as floating type, fixed or fixed_520/fixed_720 as fixed type.
+    fixed_520/fixed_720 type has add some function to simulate 520/720 bug.
+
+    Args:
+        image: [np.array], input
+        size: [tuble], (input_w, input_h)
+        keep_ratio: [bool], keep_ratio or not, default True
+        zoom: [bool], enable resize can zoom image or not, default True
+        type: [str], "bilinear" / "bilicubic" / "cv2" / "fixed" / "fixed_520" / "fixed_720"
+        calculate_ratio_using_CSim: [bool], calculate the ratio and scale using Csim function and C float, default False
+        info_out: [dic], save the final scale size(w,h) into info_out['size']
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> info = {}
+        >>> image_data = kneron_preprocessing.API.resize(image_data,size=(56,56),type='fixed',info_out=info)
+        >>> info_out['size']
+        (54,56)
+    """
+    assert isinstance(image, np.ndarray)
+    if size is None:
+        return image
+    if type is None:
+        type = default['resize']['type']
+    if calculate_ratio_using_CSim is None:
+        calculate_ratio_using_CSim = default['resize']['calculate_ratio_using_CSim']
+
+    flow.set_resize(resize_w = size[0], resize_h = size[1], type=type, keep_ratio=keep_ratio,zoom=zoom, calculate_ratio_using_CSim=calculate_ratio_using_CSim)
+    image, info = flow.funcs['resize'](image)
+    info_out['size'] = info['size']
+
+    return image
+
+def pad(image, pad_l=0, pad_r=0, pad_t=0, pad_b=0, pad_val=0):
+    """
+    pad function
+
+    specific left, right, top and bottom pad size.
+
+    Args:
+        image[np.array]: input
+        pad_l: [int], pad size from left, default 0
+        pad_r: [int], pad size form right, default 0
+        pad_t: [int], pad size from top, default 0
+        pad_b: [int], pad size form bottom, default 0
+        pad_val: [float], the value of pad, , default 0 
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.pad(image_data,20,40,20,40,-0.5)
+    """
+    assert isinstance(image, np.ndarray)
+
+    flow.set_padding(type='specific',pad_l=pad_l,pad_r=pad_r,pad_t=pad_t,pad_b=pad_b,pad_val=pad_val)
+    image, _ = flow.funcs['padding'](image)
+    return image
+
+def pad_center(image,size=None, pad_val=0):
+    """
+    pad function
+
+    center pad with pad size.
+
+    Args:
+        image[np.array]: input
+        size: [tuble], (padded_size_w, padded_size_h)
+        pad_val: [float], the value of pad, , default 0 
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.pad_center(image_data,size=(56,56),pad_val=-0.5)
+    """
+    assert isinstance(image, np.ndarray)
+    if size is None:
+        return image
+    assert ( (image.shape[0] <= size[1]) & (image.shape[1] <= size[0]) )
+
+    flow.set_padding(type='center',padded_w=size[0],padded_h=size[1],pad_val=pad_val)
+    image, _ = flow.funcs['padding'](image)
+    return image
+
+def pad_corner(image,size=None, pad_val=0):
+    """
+    pad function
+
+    corner pad with pad size.
+
+    Args:
+        image[np.array]: input
+        size: [tuble], (padded_size_w, padded_size_h)
+        pad_val: [float], the value of pad, , default 0 
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.pad_corner(image_data,size=(56,56),pad_val=-0.5)
+    """   
+    assert isinstance(image, np.ndarray)
+    if size is None:
+        return image
+    assert ( (image.shape[0] <= size[1]) & (image.shape[1] <= size[0]) )
+
+    flow.set_padding(type='corner',padded_w=size[0],padded_h=size[1],pad_val=pad_val)
+    image, _ = flow.funcs['padding'](image)
+    return image
+
+def norm(image,scale=256.,bias=-0.5, mean=None, std=None):
+    """
+    norm function
+    
+    x = (x/scale - bias)
+    x[0,1,2] = x - mean[0,1,2]
+    x[0,1,2] = x / std[0,1,2]
+
+    Args:
+        image: [np.array], input
+        scale: [float], default = 256
+        bias: [float], default = -0.5
+        mean: [tuble,3], default = None
+        std: [tuble,3], default = None
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.norm(image_data)
+        >>> image_data = kneron_preprocessing.API.norm(image_data,mean=[0.485, 0.456, 0.406], std=[0.229, 0.224, 0.225])
+    """  
+    assert isinstance(image, np.ndarray)
+
+    flow.set_normalize(type='specific',scale=scale,  bias=bias, mean=mean, std =std)
+    image, _ = flow.funcs['normalize'](image)
+    return image
+
+def inproc_520(image,raw_fmt='rgb565',raw_size=None,npu_size=None, crop_box=None, pad_mode=0, norm='kneron', gray=False, rotate=0, radix=8, bit_width=8, round_w_to_16=True, NUM_BANK_LINE=32,BANK_ENTRY_CNT=512,MAX_IMG_PREPROC_ROW_NUM=511,MAX_IMG_PREPROC_COL_NUM=256):
+    """
+    inproc_520
+
+    Args:
+        image: [np.array], input
+        crop_box: [tuble], (x1, y1, x2, y2), if None will skip crop
+        pad_mode: [int], 0: pad 2 sides, 1: pad 1 side, 2: no pad. default = 0
+        norm: [str], default = 'kneron'
+        rotate: [int], 0 / 1 / 2 ,default = 0
+        radix: [int], default = 8
+        bit_width: [int], default = 8
+        round_w_to_16: [bool], default = True
+        gray: [bool], default = False
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.inproc_520(image_data,npu_size=(56,56),crop_box=(272,145,460,341),pad_mode=1)
+    """  
+    # assert isinstance(image, np.ndarray)
+
+    if (not isinstance(image, np.ndarray)):
+        flow_520.set_raw_img(is_raw_img='yes',raw_img_type = 'bin',raw_img_fmt=raw_fmt, img_in_width=raw_size[0], img_in_height=raw_size[1])
+    else:
+        flow_520.set_raw_img(is_raw_img='no')
+        flow_520.set_color_conversion(source_format='rgb888')
+
+    if npu_size is None:
+        return image
+
+    flow_520.set_model_size(w=npu_size[0],h=npu_size[1])
+
+    ## Crop
+    if crop_box != None:
+        flow_520.set_crop(start_x=crop_box[0],start_y=crop_box[1],end_x=crop_box[2],end_y=crop_box[3])
+        crop_fisrt = True
+    else:
+        crop_fisrt = False
+
+    ## Color
+    if gray:
+        flow_520.set_color_conversion(out_format='l',simulation='no')
+    else:
+        flow_520.set_color_conversion(out_format='rgb888',simulation='no')
+
+    ## Resize & Pad
+    pad_mode = str2int(pad_mode)
+    if (pad_mode == 0):
+        pad_type = 'center'
+        resize_keep_ratio = 'yes'
+    elif (pad_mode == 1):
+        pad_type = 'corner'
+        resize_keep_ratio = 'yes'
+    else:
+        pad_type = 'center'
+        resize_keep_ratio = 'no'
+    
+    flow_520.set_resize(keep_ratio=resize_keep_ratio)
+    flow_520.set_padding(type=pad_type)
+
+    ## Norm
+    flow_520.set_normalize(type=norm)
+
+    ## 520 inproc
+    flow_520.set_520_setting(radix=radix,bit_width=bit_width,rotate=rotate,crop_fisrt=crop_fisrt,round_w_to_16=round_w_to_16,NUM_BANK_LINE=NUM_BANK_LINE,BANK_ENTRY_CNT=BANK_ENTRY_CNT,MAX_IMG_PREPROC_ROW_NUM=MAX_IMG_PREPROC_ROW_NUM,MAX_IMG_PREPROC_COL_NUM=MAX_IMG_PREPROC_COL_NUM)
+    image_data, _ = flow_520.run_whole_process(image)
+
+    return image_data
+
+def inproc_720(image,raw_fmt='rgb565',raw_size=None,npu_size=None, crop_box=None, pad_mode=0, norm='kneron', gray=False):
+    """
+    inproc_720
+
+    Args:
+        image: [np.array], input
+        crop_box: [tuble], (x1, y1, x2, y2), if None will skip crop
+        pad_mode: [int], 0: pad 2 sides, 1: pad 1 side, 2: no pad. default = 0
+        norm: [str], default = 'kneron'
+        rotate: [int], 0 / 1 / 2 ,default = 0
+        radix: [int], default = 8
+        bit_width: [int], default = 8
+        round_w_to_16: [bool], default = True
+        gray: [bool], default = False
+
+    Returns:
+        out: [np.array] 
+
+    Examples:
+        >>> image_data = kneron_preprocessing.API.inproc_520(image_data,npu_size=(56,56),crop_box=(272,145,460,341),pad_mode=1)
+    """  
+    # assert isinstance(image, np.ndarray)
+
+    if (not isinstance(image, np.ndarray)):
+        flow_720.set_raw_img(is_raw_img='yes',raw_img_type = 'bin',raw_img_fmt=raw_fmt, img_in_width=raw_size[0], img_in_height=raw_size[1])
+    else:
+        flow_720.set_raw_img(is_raw_img='no')
+        flow_720.set_color_conversion(source_format='rgb888')
+
+    if npu_size is None:
+        return image
+
+    flow_720.set_model_size(w=npu_size[0],h=npu_size[1])
+
+    ## Crop
+    if crop_box != None:
+        flow_720.set_crop(start_x=crop_box[0],start_y=crop_box[1],end_x=crop_box[2],end_y=crop_box[3])
+        crop_fisrt = True
+    else:
+        crop_fisrt = False
+
+    ## Color
+    if gray:
+        flow_720.set_color_conversion(out_format='l',simulation='no')
+    else:
+        flow_720.set_color_conversion(out_format='rgb888',simulation='no')
+
+    ## Resize & Pad
+    pad_mode = str2int(pad_mode)
+    if (pad_mode == 0):
+        pad_type = 'center'
+        resize_keep_ratio = 'yes'
+    elif (pad_mode == 1):
+        pad_type = 'corner'
+        resize_keep_ratio = 'yes'
+    else:
+        pad_type = 'center'
+        resize_keep_ratio = 'no'
+    
+    flow_720.set_resize(keep_ratio=resize_keep_ratio)
+    flow_720.set_padding(type=pad_type)
+
+    ## 720 inproc
+    # flow_720.set_720_setting(radix=radix,bit_width=bit_width,rotate=rotate,crop_fisrt=crop_fisrt,round_w_to_16=round_w_to_16,NUM_BANK_LINE=NUM_BANK_LINE,BANK_ENTRY_CNT=BANK_ENTRY_CNT,MAX_IMG_PREPROC_ROW_NUM=MAX_IMG_PREPROC_ROW_NUM,MAX_IMG_PREPROC_COL_NUM=MAX_IMG_PREPROC_COL_NUM)
+    image_data, _ = flow_720.run_whole_process(image)
+
+    return image_data
+
+def bit_match(data1, data2):
+    """
+    bit_match function
+
+    check data1 is equal to data2 or not.
+
+    Args:
+        data1: [np.array / str], can be array or txt/bin file
+        data2: [np.array / str], can be array or txt/bin file
+
+    Returns:
+        out1: [bool], is match or not
+        out2: [np.array], if not match, save the position for mismatched data
+
+    Examples:
+        >>> result, mismatched = kneron_preprocessing.API.bit_match(data1,data2)
+    """
+    if isinstance(data1, str):
+        if os.path.splitext(data1)[1] == '.bin':
+            data1 = np.fromfile(data1, dtype='uint8')
+        elif os.path.splitext(data1)[1] == '.txt':
+            data1 = np.loadtxt(data1)
+    
+    assert isinstance(data1, np.ndarray)
+
+    if isinstance(data2, str):
+        if os.path.splitext(data2)[1] == '.bin':
+            data2 = np.fromfile(data2, dtype='uint8')
+        elif os.path.splitext(data2)[1] == '.txt':
+            data2 = np.loadtxt(data2)
+
+    assert isinstance(data2, np.ndarray)
+
+
+    data1 = data1.reshape((-1,1))
+    data2 = data2.reshape((-1,1))
+
+    if not(len(data1) == len(data2)):
+        print('error len')
+        return False, np.zeros((1))
+    else: 
+        ans = data2 - data1    
+        if len(np.where(ans>0)[0]) > 0:
+            print('error',np.where(ans>0)[0])
+            return False, np.where(ans>0)[0]
+        else:
+            print('pass')
+            return True, np.zeros((1))
+
+def cpr_to_crp(x_start, x_end, y_start, y_end, pad_l, pad_r, pad_t, pad_b, rx_start, rx_end, ry_start, ry_end):
+    """
+    calculate the parameters of crop->pad->resize flow  to HW crop->resize->padding flow
+
+    Args:
+
+    Returns:
+
+    Examples:
+
+    """
+    pad_l = round(pad_l * (rx_end-rx_start) / (x_end - x_start + pad_l + pad_r))
+    pad_r = round(pad_r * (rx_end-rx_start) / (x_end - x_start + pad_l + pad_r)) 
+    pad_t = round(pad_t * (ry_end-ry_start) / (y_end - y_start + pad_t + pad_b))
+    pad_b = round(pad_b * (ry_end-ry_start) / (y_end - y_start + pad_t + pad_b))
+
+    rx_start +=pad_l
+    rx_end -=pad_r
+    ry_start +=pad_t
+    ry_end -=pad_b
+
+    return x_start, x_end, y_start, y_end, pad_l, pad_r, pad_t, pad_b, rx_start, rx_end, ry_start, ry_end
--- a/kneron/preprocessing/Cflow.py
+++ b/kneron/preprocessing/Cflow.py
@ -0,0 +1,172 @@
+import numpy as np
+import argparse
+import kneron_preprocessing
+
+def main_(args):
+    image = args.input_file
+    filefmt = args.file_fmt
+    if filefmt == 'bin':
+        raw_format = args.raw_format
+        raw_w = args.input_width
+        raw_h = args.input_height
+
+        image_data = kneron_preprocessing.API.load_bin(image,raw_format,(raw_w,raw_h))
+    else:
+        image_data = kneron_preprocessing.API.load_image(image)
+
+
+    npu_w = args.width
+    npu_h = args.height
+
+    crop_first = True if args.crop_first == "True" else False
+    if crop_first:
+        x1 = args.x_pos
+        y1 = args.y_pos
+        x2 = args.crop_w + x1
+        y2 = args.crop_h + y1
+        crop_box = [x1,y1,x2,y2]
+    else:
+        crop_box = None
+
+    pad_mode = args.pad_mode
+    norm_mode = args.norm_mode
+    bitwidth = args.bitwidth
+    radix = args.radix
+    rotate = args.rotate_mode
+
+    ##
+    image_data = kneron_preprocessing.API.inproc_520(image_data,npu_size=(npu_w,npu_h),crop_box=crop_box,pad_mode=pad_mode,norm=norm_mode,rotate=rotate,radix=radix,bit_width=bitwidth)
+
+    output_file = args.output_file
+    kneron_preprocessing.API.dump_image(image_data,output_file,'bin','rgba')
+
+    return
+
+
+if __name__ == "__main__":
+    argparser = argparse.ArgumentParser(
+        description="preprocessing"
+        )
+
+    argparser.add_argument(
+        '-i',
+        '--input_file',
+        help="input file name"
+        )
+
+    argparser.add_argument(
+        '-ff',
+        '--file_fmt',
+        help="input file format, jpg or bin"
+        )
+
+    argparser.add_argument(
+        '-rf',
+        '--raw_format',
+        help="input file image format, rgb or rgb565 or nir"
+        )
+
+    argparser.add_argument(
+        '-i_w',
+        '--input_width',
+        type=int,
+        help="input image width"
+        )
+
+    argparser.add_argument(
+        '-i_h',
+        '--input_height',
+        type=int,
+        help="input image height"
+        )
+
+    argparser.add_argument(
+        '-o',
+        '--output_file',
+        help="output file name"
+        )
+
+    argparser.add_argument(
+        '-s_w',
+        '--width',
+        type=int,
+        help="output width for npu input",
+        )
+
+    argparser.add_argument(
+        '-s_h',
+        '--height',
+        type=int,
+        help="output height for npu input",
+        )
+
+    argparser.add_argument(
+        '-c_f',
+        '--crop_first',
+        help="crop first True or False",
+        )
+
+    argparser.add_argument(
+        '-x',
+        '--x_pos',
+        type=int,
+        help="left up coordinate x",
+        )
+
+    argparser.add_argument(
+        '-y',
+        '--y_pos',
+        type=int,
+        help="left up coordinate y",
+        )
+
+    argparser.add_argument(
+        '-c_w',
+        '--crop_w',
+        type=int,
+        help="crop width",
+        )
+
+    argparser.add_argument(
+        '-c_h',
+        '--crop_h',
+        type=int,
+        help="crop height",
+        )
+
+    argparser.add_argument(
+        '-p_m',
+        '--pad_mode',
+        type=int,
+        help=" 0: pad 2 sides, 1: pad 1 side, 2: no pad.",
+        )
+
+    argparser.add_argument(
+        '-n_m',
+        '--norm_mode',
+        help="normalizaton mode: yolo, kneron, tf."
+        )
+
+    argparser.add_argument(
+        '-r_m',
+        '--rotate_mode',
+        type=int,
+        help="rotate mode:0,1,2"
+        )
+
+    argparser.add_argument(
+        '-bw',
+        '--bitwidth',
+        type=int,
+        help="Int for bitwidth"
+        )
+    
+    argparser.add_argument(
+        '-r',
+        '--radix',
+        type=int,
+        help="Int for radix"
+        )
+
+    args = argparser.parse_args()
+    main_(args)
--- a/kneron/preprocessing/Flow.py
+++ b/kneron/preprocessing/Flow.py
--- a/kneron/preprocessing/init.py
+++ b/kneron/preprocessing/init.py
@ -0,0 +1,2 @@
+from .Flow import *
+from .API import *
--- a/kneron/preprocessing/funcs/ColorConversion.py
+++ b/kneron/preprocessing/funcs/ColorConversion.py
@ -0,0 +1,285 @@
+import numpy as np
+from PIL import Image
+from .utils import signed_rounding, clip, str2bool
+
+format_bit = 10
+c00_yuv = 1
+c02_yuv = 1436
+c10_yuv = 1
+c11_yuv = -354
+c12_yuv = -732
+c20_yuv = 1
+c21_yuv = 1814
+c00_ycbcr = 1192
+c02_ycbcr = 1634
+c10_ycbcr = 1192
+c11_ycbcr = -401
+c12_ycbcr = -833
+c20_ycbcr = 1192
+c21_ycbcr = 2065
+
+Matrix_ycbcr_to_rgb888 = np.array(
+    [[1.16438356e+00,  1.16438356e+00,  1.16438356e+00],
+     [2.99747219e-07, - 3.91762529e-01,  2.01723263e+00],
+     [1.59602686e+00, - 8.12968294e-01,  3.04059479e-06]])
+
+Matrix_rgb888_to_ycbcr = np.array(
+    [[0.25678824, - 0.14822353,  0.43921569],
+     [0.50412941, - 0.29099216, - 0.36778824],
+     [0.09790588,  0.43921569, - 0.07142745]])
+
+Matrix_rgb888_to_yuv = np.array(
+    [[ 0.29899106, -0.16877996,  0.49988381],
+    [ 0.5865453,  -0.33110385, -0.41826072],
+    [ 0.11446364,  0.49988381, -0.08162309]])
+
+# Matrix_rgb888_to_yuv = np.array(
+#     [[0.299, - 0.147,   0.615],
+#      [0.587, - 0.289, - 0.515],
+#      [0.114,   0.436, - 0.100]])
+
+# Matrix_yuv_to_rgb888 = np.array(
+#     [[1.000,   1.000,  1.000],
+#      [0.000, - 0.394,  2.032],
+#      [1.140, - 0.581,  0.000]])
+
+class runner(object):
+    def __init__(self):
+        self.set = {
+            'print_info':'no',
+            'model_size':[0,0],
+            'numerical_type':'floating',
+            "source_format": "rgb888",
+            "out_format": "rgb888",
+            "options": {
+                "simulation": "no",
+                "simulation_format": "rgb888"
+            }
+        }
+
+    def update(self, **kwargs):
+        #
+        self.set.update(kwargs)
+
+        ## simulation
+        self.funs = []
+        if str2bool(self.set['options']['simulation']) and self.set['source_format'].lower() in ['RGB888', 'rgb888', 'RGB', 'rgb']:
+            if self.set['options']['simulation_format'].lower() in ['YUV422', 'yuv422', 'YUV', 'yuv']:
+                self.funs.append(self._ColorConversion_RGB888_to_YUV422)
+                self.set['source_format'] = 'YUV422'
+            elif self.set['options']['simulation_format'].lower() in ['YCBCR422', 'YCbCr422', 'ycbcr422', 'YCBCR', 'YCbCr', 'ycbcr']:
+                self.funs.append(self._ColorConversion_RGB888_to_YCbCr422)
+                self.set['source_format'] = 'YCbCr422'
+            elif self.set['options']['simulation_format'].lower() in['RGB565', 'rgb565']:
+                self.funs.append(self._ColorConversion_RGB888_to_RGB565)
+                self.set['source_format'] = 'RGB565'
+        
+        ## to rgb888
+        if self.set['source_format'].lower() in ['YUV444', 'yuv444','YUV422', 'yuv422', 'YUV', 'yuv']:
+            self.funs.append(self._ColorConversion_YUV_to_RGB888)
+        elif self.set['source_format'].lower() in ['YCBCR444', 'YCbCr444', 'ycbcr444','YCBCR422', 'YCbCr422', 'ycbcr422', 'YCBCR', 'YCbCr', 'ycbcr']:
+            self.funs.append(self._ColorConversion_YCbCr_to_RGB888)
+        elif self.set['source_format'].lower() in ['RGB565', 'rgb565']:
+            self.funs.append(self._ColorConversion_RGB565_to_RGB888)
+        elif self.set['source_format'].lower() in ['l', 'L' , 'nir', 'NIR']:
+            self.funs.append(self._ColorConversion_L_to_RGB888)
+        elif self.set['source_format'].lower() in ['RGBA8888', 'rgba8888' , 'RGBA', 'rgba']:
+            self.funs.append(self._ColorConversion_RGBA8888_to_RGB888)
+
+        ## output format
+        if self.set['out_format'].lower() in ['L', 'l']:
+            self.funs.append(self._ColorConversion_RGB888_to_L)
+        elif self.set['out_format'].lower() in['RGB565', 'rgb565']:
+            self.funs.append(self._ColorConversion_RGB888_to_RGB565)
+        elif self.set['out_format'].lower() in['RGBA', 'RGBA8888','rgba','rgba8888']:
+            self.funs.append(self._ColorConversion_RGB888_to_RGBA8888)
+        elif self.set['out_format'].lower() in['YUV', 'YUV444','yuv','yuv444']:
+            self.funs.append(self._ColorConversion_RGB888_to_YUV444)
+        elif self.set['out_format'].lower() in['YUV422','yuv422']:
+            self.funs.append(self._ColorConversion_RGB888_to_YUV422)
+        elif self.set['out_format'].lower() in['YCBCR', 'YCBCR444','YCbCr','YCbCr444','ycbcr','ycbcr444']:
+            self.funs.append(self._ColorConversion_RGB888_to_YCbCr444)
+        elif self.set['out_format'].lower() in['YCBCR422','YCbCr422','ycbcr422']:
+            self.funs.append(self._ColorConversion_RGB888_to_YCbCr422)
+
+    def print_info(self):
+        print("<colorConversion>",
+              "source_format:", self.set['source_format'],
+              ', out_format:', self.set['out_format'],
+              ', simulation:', self.set['options']['simulation'],
+              ', simulation_format:', self.set['options']['simulation_format'])
+
+    def run(self, image_data):
+        assert isinstance(image_data, np.ndarray)
+        # print info
+        if str2bool(self.set['print_info']):
+            self.print_info()
+
+        # color
+        for _, f in enumerate(self.funs):
+            image_data = f(image_data)
+
+        # output
+        info = {}
+        return image_data, info
+
+    def _ColorConversion_RGB888_to_YUV444(self, image):
+        ## floating
+        image = image.astype('float')
+        image = (image @ Matrix_rgb888_to_yuv + 0.5).astype('uint8')
+        return image
+
+    def _ColorConversion_RGB888_to_YUV422(self, image):
+        # rgb888 to yuv444
+        image = self._ColorConversion_RGB888_to_YUV444(image)
+
+        # yuv444 to yuv422
+        u2 = image[:, 0::2, 1]
+        u4 = np.repeat(u2, 2, axis=1)
+        v2 = image[:, 1::2, 2]
+        v4 = np.repeat(v2, 2, axis=1)
+        image[..., 1] = u4
+        image[..., 2] = v4
+        return image
+           
+    def _ColorConversion_YUV_to_RGB888(self, image):
+        ## fixed
+        h, w, c = image.shape
+        image_f = image.reshape((h * w, c))
+        image_rgb_f = np.zeros(image_f.shape, dtype=np.uint8)
+
+        for i in range(h * w):
+            image_y = image_f[i, 0] *1024
+            if image_f[i, 1] > 127:
+                image_u = -((~(image_f[i, 1] - 1)) & 0xFF)
+            else:
+                image_u = image_f[i, 1]
+            if image_f[i, 2] > 127:
+                image_v = -((~(image_f[i, 2] - 1)) & 0xFF)
+            else:
+                image_v = image_f[i, 2]
+
+            image_r = c00_yuv * image_y + c02_yuv * image_v
+            image_g = c10_yuv * image_y + c11_yuv * image_u + c12_yuv * image_v
+            image_b = c20_yuv * image_y + c21_yuv * image_u
+
+            image_r = signed_rounding(image_r, format_bit)
+            image_g = signed_rounding(image_g, format_bit)
+            image_b = signed_rounding(image_b, format_bit)
+
+            image_r = image_r >> format_bit
+            image_g = image_g >> format_bit
+            image_b = image_b >> format_bit
+
+            image_rgb_f[i, 0] = clip(image_r, 0, 255)
+            image_rgb_f[i, 1] = clip(image_g, 0, 255)
+            image_rgb_f[i, 2] = clip(image_b, 0, 255)
+
+        image_rgb = image_rgb_f.reshape((h, w, c))
+        return image_rgb
+
+    def _ColorConversion_RGB888_to_YCbCr444(self, image):
+        ## floating
+        image = image.astype('float')
+        image = (image @ Matrix_rgb888_to_ycbcr + 0.5).astype('uint8')
+        image[:, :, 0] += 16
+        image[:, :, 1] += 128
+        image[:, :, 2] += 128
+
+        return image
+
+    def _ColorConversion_RGB888_to_YCbCr422(self, image):
+        # rgb888 to ycbcr444
+        image = self._ColorConversion_RGB888_to_YCbCr444(image)
+
+        # ycbcr444 to ycbcr422
+        cb2 = image[:, 0::2, 1]
+        cb4 = np.repeat(cb2, 2, axis=1)
+        cr2 = image[:, 1::2, 2]
+        cr4 = np.repeat(cr2, 2, axis=1)
+        image[..., 1] = cb4
+        image[..., 2] = cr4
+        return image
+
+    def _ColorConversion_YCbCr_to_RGB888(self, image):
+        ## floating
+        if (self.set['numerical_type'] == 'floating'):
+            image = image.astype('float')
+            image[:, :, 0] -= 16
+            image[:, :, 1] -= 128
+            image[:, :, 2] -= 128
+            image = ((image @ Matrix_ycbcr_to_rgb888) + 0.5).astype('uint8')
+            return image
+
+        ## fixed
+        h, w, c = image.shape
+        image_f = image.reshape((h * w, c))
+        image_rgb_f = np.zeros(image_f.shape, dtype=np.uint8)
+
+        for i in range(h * w):
+            image_y = (image_f[i, 0] - 16) * c00_ycbcr
+            image_cb = image_f[i, 1] - 128
+            image_cr = image_f[i, 2] - 128
+
+            image_r = image_y + c02_ycbcr * image_cr
+            image_g = image_y + c11_ycbcr * image_cb + c12_ycbcr * image_cr
+            image_b = image_y + c21_ycbcr * image_cb
+
+            image_r = signed_rounding(image_r, format_bit)
+            image_g = signed_rounding(image_g, format_bit)
+            image_b = signed_rounding(image_b, format_bit)
+
+            image_r = image_r >> format_bit
+            image_g = image_g >> format_bit
+            image_b = image_b >> format_bit
+
+            image_rgb_f[i, 0] = clip(image_r, 0, 255)
+            image_rgb_f[i, 1] = clip(image_g, 0, 255)
+            image_rgb_f[i, 2] = clip(image_b, 0, 255)
+
+        image_rgb = image_rgb_f.reshape((h, w, c))
+        return image_rgb
+
+    def _ColorConversion_RGB888_to_RGB565(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]>=3)
+        
+        image_rgb565 = np.zeros(image.shape, dtype=np.uint8)
+        image_rgb = image.astype('uint8')
+        image_rgb565[:, :, 0] = image_rgb[:, :, 0] >> 3
+        image_rgb565[:, :, 1] = image_rgb[:, :, 1] >> 2
+        image_rgb565[:, :, 2] = image_rgb[:, :, 2] >> 3
+        return image_rgb565
+
+    def _ColorConversion_RGB565_to_RGB888(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]==3)
+
+        image_rgb = np.zeros(image.shape, dtype=np.uint8)
+        image_rgb[:, :, 0] = image[:, :, 0] << 3
+        image_rgb[:, :, 1] = image[:, :, 1] << 2
+        image_rgb[:, :, 2] = image[:, :, 2] << 3
+        return image_rgb
+
+    def _ColorConversion_L_to_RGB888(self, image):
+        image_L = image.astype('uint8')
+        img = Image.fromarray(image_L).convert('RGB')
+        image_data = np.array(img).astype('uint8')
+        return image_data
+
+    def _ColorConversion_RGB888_to_L(self, image):
+        image_rgb = image.astype('uint8')
+        img = Image.fromarray(image_rgb).convert('L')
+        image_data = np.array(img).astype('uint8')
+        return image_data
+
+    def _ColorConversion_RGBA8888_to_RGB888(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]==4)
+        return image[:,:,:3]
+
+    def _ColorConversion_RGB888_to_RGBA8888(self, image):
+        assert (len(image.shape)==3)
+        assert (image.shape[2]==3)
+        imageA = np.concatenate((image, np.zeros((image.shape[0], image.shape[1], 1), dtype=np.uint8) ), axis=2)
+        return imageA
--- a/kneron/preprocessing/funcs/Crop.py
+++ b/kneron/preprocessing/funcs/Crop.py
@ -0,0 +1,145 @@
+import numpy as np
+from PIL import Image
+from .utils import str2int, str2float, str2bool, pad_square_to_4
+from .utils_520 import round_up_n
+from .Runner_base import Runner_base, Param_base
+
+class General(Param_base):
+    type = 'center'
+    align_w_to_4 = False
+    pad_square_to_4 = False
+    rounding_type = 0
+    crop_w = 0
+    crop_h = 0
+    start_x = 0.
+    start_y = 0.
+    end_x = 0.
+    end_y = 0.
+    def update(self, **dic):
+        self.type = dic['type']
+        self.align_w_to_4 = str2bool(dic['align_w_to_4'])
+        self.rounding_type = str2int(dic['rounding_type'])
+        self.crop_w = str2int(dic['crop_w'])
+        self.crop_h = str2int(dic['crop_h'])
+        self.start_x = str2float(dic['start_x'])
+        self.start_y = str2float(dic['start_y'])
+        self.end_x = str2float(dic['end_x'])
+        self.end_y = str2float(dic['end_y'])
+
+    def __str__(self):
+        str_out = [
+            ', type:',str(self.type),
+            ', align_w_to_4:',str(self.align_w_to_4),
+            ', pad_square_to_4:',str(self.pad_square_to_4),
+            ', crop_w:',str(self.crop_w),
+            ', crop_h:',str(self.crop_h),
+            ', start_x:',str(self.start_x),
+            ', start_y:',str(self.start_y),
+            ', end_x:',str(self.end_x),
+            ', end_y:',str(self.end_y)]
+        return(' '.join(str_out))
+       
+class runner(Runner_base):
+    ## overwrite the class in Runner_base
+    general = General()
+
+    def __str__(self):
+        return('<Crop>')
+
+    def update(self, **kwargs):
+        ##
+        super().update(**kwargs)
+
+        ##
+        if (self.general.start_x != self.general.end_x) and (self.general.start_y != self.general.end_y):
+            self.general.type = 'specific'
+        elif(self.general.type != 'specific'):
+            if self.general.crop_w == 0 or self.general.crop_h == 0:
+                self.general.crop_w = self.common.model_size[0]
+                self.general.crop_h = self.common.model_size[1]
+            assert(self.general.crop_w > 0)
+            assert(self.general.crop_h > 0)
+            assert(self.general.type.lower() in ['CENTER', 'Center', 'center', 'CORNER', 'Corner', 'corner'])
+        else:
+            assert(self.general.type == 'specific')
+
+    def run(self, image_data):
+        ## init
+        img = Image.fromarray(image_data)
+        w, h = img.size
+
+        ## get range
+        if self.general.type.lower() in ['CENTER', 'Center', 'center']:
+            x1, y1, x2, y2 = self._calcuate_xy_center(w, h)
+        elif self.general.type.lower() in ['CORNER', 'Corner', 'corner']:
+            x1, y1, x2, y2 = self._calcuate_xy_corner(w, h)
+        else:
+            x1 = self.general.start_x
+            y1 = self.general.start_y
+            x2 = self.general.end_x
+            y2 = self.general.end_y
+            assert( ((x1 != x2) and (y1 != y2)) )
+
+        ## rounding
+        if self.general.rounding_type == 0:
+            x1 = int(np.floor(x1))
+            y1 = int(np.floor(y1))
+            x2 = int(np.ceil(x2))
+            y2 = int(np.ceil(y2))
+        else:
+            x1 = int(round(x1))
+            y1 = int(round(y1))
+            x2 = int(round(x2))
+            y2 = int(round(y2))
+
+        if self.general.align_w_to_4:
+            # x1 = (x1+1) &(~3)  #//+2
+            # x2 = (x2+2) &(~3)  #//+1
+            x1 = (x1+3) &(~3)  #//+2
+            left = w - x2
+            left = (left+3) &(~3)
+            x2 = w - left
+
+        ## pad_square_to_4
+        if str2bool(self.general.pad_square_to_4):
+            x1,x2,y1,y2 = pad_square_to_4(x1,x2,y1,y2)
+
+        # do crop
+        box = (x1,y1,x2,y2)
+        img = img.crop(box)
+
+        # print info
+        if str2bool(self.common.print_info):
+            self.general.start_x = x1
+            self.general.start_y = y1
+            self.general.end_x = x2
+            self.general.end_y = y2
+            self.general.crop_w = x2 - x1
+            self.general.crop_h = y2 - y1
+            self.print_info()
+
+        # output
+        image_data = np.array(img)
+        info = {}
+        info['box'] = box
+
+        return image_data, info
+
+
+    ## protect fun
+    def _calcuate_xy_center(self, w, h):
+        x1 = w/2 - self.general.crop_w / 2
+        y1 = h/2 - self.general.crop_h / 2
+        x2 = w/2 + self.general.crop_w / 2
+        y2 = h/2 + self.general.crop_h / 2
+        return x1, y1, x2, y2
+
+    def _calcuate_xy_corner(self, _1, _2):
+        x1 = 0
+        y1 = 0
+        x2 = self.general.crop_w
+        y2 = self.general.crop_h
+        return x1, y1, x2, y2
+
+    def do_crop(self, image_data, startW, startH, endW, endH):
+        return image_data[startH:endH, startW:endW, :]
--- a/kneron/preprocessing/funcs/Normalize.py
+++ b/kneron/preprocessing/funcs/Normalize.py
@ -0,0 +1,186 @@
+import numpy as np
+from .utils import str2bool, str2int, str2float, clip_ary
+
+class runner(object):
+    def __init__(self):
+        self.set = {
+            'general': {
+                'print_info':'no',
+                'model_size':[0,0],
+                'numerical_type':'floating',
+                'type': 'kneron'
+            },
+            'floating':{
+                "scale": 1,
+                "bias": 0,
+                "mean": "",
+                "std": "",
+            },
+            'hw':{
+                "radix":8,
+                "shift":"",
+                "sub":""
+            }
+        }
+        return
+
+    def update(self, **kwargs):
+        #
+        self.set.update(kwargs)
+
+        #
+        if self.set['general']['numerical_type'] == '520':
+            if self.set['general']['type'].lower() in ['TF', 'Tf', 'tf']:
+                self.fun_normalize = self._chen_520
+                self.shift = 7 - self.set['hw']['radix']
+                self.sub = 128
+            elif self.set['general']['type'].lower() in ['YOLO', 'Yolo', 'yolo']:
+                self.fun_normalize = self._chen_520
+                self.shift = 8 - self.set['hw']['radix']
+                self.sub = 0
+            elif self.set['general']['type'].lower() in ['KNERON', 'Kneron', 'kneron']:
+                self.fun_normalize = self._chen_520
+                self.shift = 8 - self.set['hw']['radix']
+                self.sub = 128
+            else:
+                self.fun_normalize = self._chen_520
+                self.shift = 0
+                self.sub = 0      
+        elif self.set['general']['numerical_type'] == '720':
+                self.fun_normalize = self._chen_720
+                self.shift = 0
+                self.sub = 0                   
+        else:
+            if self.set['general']['type'].lower() in ['TORCH', 'Torch', 'torch']:
+                self.fun_normalize = self._normalize_torch
+                self.set['floating']['scale'] = 255.
+                self.set['floating']['mean'] = [0.485, 0.456, 0.406]
+                self.set['floating']['std'] = [0.229, 0.224, 0.225]
+            elif self.set['general']['type'].lower() in ['TF', 'Tf', 'tf']:
+                self.fun_normalize = self._normalize_tf
+                self.set['floating']['scale'] = 127.5
+                self.set['floating']['bias'] = -1.
+            elif self.set['general']['type'].lower() in ['CAFFE', 'Caffe', 'caffe']:
+                self.fun_normalize = self._normalize_caffe
+                self.set['floating']['mean'] = [103.939, 116.779, 123.68]
+            elif self.set['general']['type'].lower() in ['YOLO', 'Yolo', 'yolo']:
+                self.fun_normalize = self._normalize_yolo
+                self.set['floating']['scale'] = 255.
+            elif self.set['general']['type'].lower() in ['KNERON', 'Kneron', 'kneron']:
+                self.fun_normalize = self._normalize_kneron
+                self.set['floating']['scale'] = 256.
+                self.set['floating']['bias'] = -0.5
+            else:
+                self.fun_normalize = self._normalize_customized
+                self.set['floating']['scale'] = str2float(self.set['floating']['scale'])
+                self.set['floating']['bias'] = str2float(self.set['floating']['bias'])
+                if self.set['floating']['mean'] != None:
+                    if len(self.set['floating']['mean']) != 3:
+                        self.set['floating']['mean'] = None
+                if self.set['floating']['std'] != None:
+                    if len(self.set['floating']['std']) != 3:
+                        self.set['floating']['std'] = None
+
+
+    def print_info(self):
+        if self.set['general']['numerical_type'] == '520':
+            print("<normalize>",
+            'numerical_type', self.set['general']['numerical_type'],
+            ", type:", self.set['general']['type'],
+            ', shift:',self.shift, 
+            ', sub:', self.sub)
+        else:
+            print("<normalize>",
+            'numerical_type', self.set['general']['numerical_type'],
+            ", type:", self.set['general']['type'],
+            ', scale:',self.set['floating']['scale'], 
+            ', bias:', self.set['floating']['bias'],
+            ', mean:', self.set['floating']['mean'],
+            ', std:',self.set['floating']['std'])
+
+    def run(self, image_data):
+        # print info
+        if str2bool(self.set['general']['print_info']):
+            self.print_info()
+
+        # norm
+        image_data = self.fun_normalize(image_data)
+
+        # output
+        info = {}
+        return image_data, info
+
+    def _normalize_torch(self, x):
+        if len(x.shape) != 3:
+            return x
+        x = x.astype('float')
+        x = x / self.set['floating']['scale']
+        x[..., 0] -= self.set['floating']['mean'][0]
+        x[..., 1] -= self.set['floating']['mean'][1]
+        x[..., 2] -= self.set['floating']['mean'][2]
+        x[..., 0] /= self.set['floating']['std'][0]
+        x[..., 1] /= self.set['floating']['std'][1]
+        x[..., 2] /= self.set['floating']['std'][2]
+        return x
+
+    def _normalize_tf(self, x):
+        # print('_normalize_tf')
+        x = x.astype('float')
+        x = x / self.set['floating']['scale']
+        x = x + self.set['floating']['bias']
+        return x
+
+    def _normalize_caffe(self, x):
+        if len(x.shape) != 3:
+            return x
+        x = x.astype('float')
+        x = x[..., ::-1]
+        x[..., 0] -= self.set['floating']['mean'][0]
+        x[..., 1] -= self.set['floating']['mean'][1]
+        x[..., 2] -= self.set['floating']['mean'][2]
+        return x
+
+    def _normalize_yolo(self, x):
+        # print('_normalize_yolo')
+        x = x.astype('float')
+        x = x / self.set['floating']['scale']
+        return x
+
+    def _normalize_kneron(self, x):
+        # print('_normalize_kneron')
+        x = x.astype('float')
+        x = x/self.set['floating']['scale']
+        x = x + self.set['floating']['bias']
+        return x
+
+    def _normalize_customized(self, x):
+        # print('_normalize_customized')
+        x = x.astype('float')
+        if  self.set['floating']['scale'] != 0:
+            x = x/ self.set['floating']['scale'] 
+        x = x + self.set['floating']['bias'] 
+        if self.set['floating']['mean'] is not None:
+            x[..., 0] -= self.set['floating']['mean'][0]
+            x[..., 1] -= self.set['floating']['mean'][1]
+            x[..., 2] -= self.set['floating']['mean'][2]
+        if self.set['floating']['std'] is not None:
+            x[..., 0] /= self.set['floating']['std'][0]
+            x[..., 1] /= self.set['floating']['std'][1]
+            x[..., 2] /= self.set['floating']['std'][2]
+
+        return x
+
+    def _chen_520(self, x):
+        # print('_chen_520')
+        x = (x - self.sub).astype('uint8')
+        x = (np.right_shift(x,self.shift))
+        x=x.astype('uint8')
+        return x
+
+    def _chen_720(self, x):
+        # print('_chen_720')
+        if self.shift == 1:
+            x = x + np.array([[self.sub], [self.sub], [self.sub]])
+        else:
+            x = x + np.array([[self.sub], [self.sub], [self.sub]])
+        return x
--- a/kneron/preprocessing/funcs/Padding.py
+++ b/kneron/preprocessing/funcs/Padding.py
@ -0,0 +1,187 @@
+import numpy as np
+from PIL import Image
+from .utils import str2bool, str2int, str2float
+from .Runner_base import Runner_base, Param_base
+
+class General(Param_base):
+    type = ''
+    pad_val = ''
+    padded_w = ''
+    padded_h = ''
+    pad_l = ''
+    pad_r = ''
+    pad_t = ''
+    pad_b = ''
+    padding_ch = 3
+    padding_ch_type = 'RGB'
+    def update(self, **dic):
+        self.type = dic['type']
+        self.pad_val = dic['pad_val']
+        self.padded_w = str2int(dic['padded_w'])
+        self.padded_h = str2int(dic['padded_h'])
+        self.pad_l = str2int(dic['pad_l'])
+        self.pad_r = str2int(dic['pad_r'])
+        self.pad_t = str2int(dic['pad_t'])
+        self.pad_b = str2int(dic['pad_b'])
+
+    def __str__(self):
+        str_out = [
+            ', type:',str(self.type),
+            ', pad_val:',str(self.pad_val),
+            ', pad_l:',str(self.pad_l),
+            ', pad_r:',str(self.pad_r),
+            ', pad_r:',str(self.pad_t),
+            ', pad_b:',str(self.pad_b),
+            ', padding_ch:',str(self.padding_ch)]
+        return(' '.join(str_out))
+
+class Hw(Param_base):
+    radix = 8
+    normalize_type = 'floating'
+    def update(self, **dic):
+        self.radix = dic['radix']
+        self.normalize_type = dic['normalize_type']
+
+    def __str__(self):
+        str_out = [
+            ', radix:', str(self.radix),
+            ', normalize_type:',str(self.normalize_type)]
+        return(' '.join(str_out))
+
+
+class runner(Runner_base):
+    ## overwrite the class in Runner_base
+    general = General()
+    hw = Hw()
+
+    def __str__(self):
+        return('<Padding>')
+
+    def update(self, **kwargs):
+        super().update(**kwargs)
+
+        ## update pad type & pad length
+        if (self.general.pad_l != 0) or (self.general.pad_r != 0) or (self.general.pad_t != 0) or (self.general.pad_b != 0):
+            self.general.type = 'specific'
+            assert(self.general.pad_l >= 0)
+            assert(self.general.pad_r >= 0)
+            assert(self.general.pad_t >= 0)
+            assert(self.general.pad_b >= 0)
+        elif(self.general.type != 'specific'):
+            if self.general.padded_w == 0 or self.general.padded_h == 0:
+                self.general.padded_w = self.common.model_size[0]
+                self.general.padded_h = self.common.model_size[1]
+            assert(self.general.padded_w > 0)
+            assert(self.general.padded_h > 0)
+            assert(self.general.type.lower() in ['CENTER', 'Center', 'center', 'CORNER', 'Corner', 'corner'])
+        else:
+            assert(self.general.type == 'specific')
+            
+        ## decide pad_val & padding ch
+        # if numerical_type is floating
+        if (self.common.numerical_type == 'floating'):
+            if self.general.pad_val != 'edge':
+                self.general.pad_val = str2float(self.general.pad_val)
+            self.general.padding_ch = 3
+            self.general.padding_ch_type = 'RGB'
+        # if numerical_type is 520 or 720
+        else: 
+            if self.general.pad_val == '':
+                if self.hw.normalize_type.lower() in ['TF', 'Tf', 'tf']:
+                    self.general.pad_val = np.uint8(-128 >> (7 - self.hw.radix))
+                elif self.hw.normalize_type.lower() in ['YOLO', 'Yolo', 'yolo']:
+                    self.general.pad_val = np.uint8(0 >> (8 - self.hw.radix))
+                elif self.hw.normalize_type.lower() in ['KNERON', 'Kneron', 'kneron']:
+                    self.general.pad_val = np.uint8(-128 >> (8 - self.hw.radix))
+                else:
+                    self.general.pad_val = np.uint8(0 >> (8 - self.hw.radix))
+            else:
+                self.general.pad_val = str2int(self.general.pad_val)
+            self.general.padding_ch = 4
+            self.general.padding_ch_type = 'RGBA'
+
+    def run(self, image_data):
+        # init
+        shape = image_data.shape
+        w = shape[1]
+        h = shape[0]
+        if len(shape) < 3:
+            self.general.padding_ch = 1
+            self.general.padding_ch_type = 'L'
+        else:
+            if shape[2] == 3 and self.general.padding_ch == 4:
+                image_data = np.concatenate((image_data, np.zeros((h, w, 1), dtype=np.uint8) ), axis=2)
+                
+        ## padding
+        if self.general.type.lower() in ['CENTER',  'Center',  'center']:
+            img_pad = self._padding_center(image_data, w, h)
+        elif self.general.type.lower() in ['CORNER',  'Corner',  'corner']:
+            img_pad = self._padding_corner(image_data, w, h)
+        else:
+            img_pad = self._padding_sp(image_data, w, h)
+
+        # print info
+        if str2bool(self.common.print_info):
+            self.print_info()
+
+        # output
+        info = {}
+        return img_pad, info
+
+    ## protect fun
+    def _padding_center(self, img, ori_w, ori_h):
+        # img_pad = Image.new(self.general.padding_ch_type, (self.general.padded_w, self.general.padded_h), int(self.general.pad_val[0]))
+        # img = Image.fromarray(img)
+        # img_pad.paste(img, ((self.general.padded_w-ori_w)//2, (self.general.padded_h-ori_h)//2))
+        # return img_pad
+        padH = self.general.padded_h - ori_h
+        padW = self.general.padded_w - ori_w
+        self.general.pad_t = padH // 2
+        self.general.pad_b = (padH // 2) + (padH % 2)
+        self.general.pad_l = padW // 2
+        self.general.pad_r = (padW // 2) + (padW % 2)
+        if self.general.pad_l < 0 or self.general.pad_r <0 or self.general.pad_t <0 or self.general.pad_b<0:
+            return img
+        img_pad = self._padding_sp(img,ori_w,ori_h)
+        return img_pad
+
+    def _padding_corner(self, img, ori_w, ori_h):
+        # img_pad = Image.new(self.general.padding_ch_type, (self.general.padded_w, self.general.padded_h), self.general.pad_val)
+        # img_pad.paste(img, (0, 0))
+        self.general.pad_l = 0
+        self.general.pad_r = self.general.padded_w - ori_w
+        self.general.pad_t = 0
+        self.general.pad_b = self.general.padded_h - ori_h
+        if self.general.pad_l < 0 or self.general.pad_r <0 or self.general.pad_t <0 or self.general.pad_b<0:
+            return img
+        img_pad = self._padding_sp(img,ori_w,ori_h)
+        return img_pad
+
+    def _padding_sp(self, img, ori_w, ori_h):
+        # block_t = np.zeros((self.general.pad_t, self.general.pad_l + self.general.pad_r + ori_w, self.general.padding_ch), dtype=np.float)
+        # block_l = np.zeros((ori_h, self.general.pad_l, self.general.padding_ch), dtype=np.float)
+        # block_r = np.zeros((ori_h, self.general.pad_r, self.general.padding_ch), dtype=np.float)
+        # block_b = np.zeros((self.general.pad_b, self.general.pad_l + self.general.pad_r + ori_w, self.general.padding_ch), dtype=np.float)
+        # for i in range(self.general.padding_ch):
+        #     block_t[:, :, i] = np.ones(block_t[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        #     block_l[:, :, i] = np.ones(block_l[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        #     block_r[:, :, i] = np.ones(block_r[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        #     block_b[:, :, i] = np.ones(block_b[:, :, i].shape, dtype=np.float) * self.general.pad_val
+        # padded_image_hor = np.concatenate((block_l, img, block_r), axis=1)
+        # padded_image = np.concatenate((block_t, padded_image_hor, block_b), axis=0)
+        # return padded_image
+        if self.general.padding_ch == 1:
+            pad_range = ( (self.general.pad_t, self.general.pad_b),(self.general.pad_l, self.general.pad_r) )
+        else:
+            pad_range = ((self.general.pad_t, self.general.pad_b),(self.general.pad_l, self.general.pad_r),(0,0))
+
+        if isinstance(self.general.pad_val, str):
+            if self.general.pad_val == 'edge':
+                padded_image = np.pad(img, pad_range, mode="edge")
+            else:
+                padded_image = np.pad(img, pad_range, mode="constant",constant_values=0)
+        else:
+            padded_image = np.pad(img, pad_range, mode="constant",constant_values=self.general.pad_val)
+        
+        return padded_image
+
--- a/kneron/preprocessing/funcs/Resize.py
+++ b/kneron/preprocessing/funcs/Resize.py
@ -0,0 +1,237 @@
+import numpy as np
+import cv2
+from PIL import Image
+from .utils import str2bool, str2int
+from ctypes import c_float
+from .Runner_base import Runner_base, Param_base
+
+class General(Param_base):
+    type = 'bilinear'
+    keep_ratio = True
+    zoom = True
+    calculate_ratio_using_CSim = True
+    resize_w = 0
+    resize_h = 0
+    resized_w = 0
+    resized_h = 0
+    def update(self, **dic):
+        self.type = dic['type']
+        self.keep_ratio = str2bool(dic['keep_ratio'])
+        self.zoom = str2bool(dic['zoom'])
+        self.calculate_ratio_using_CSim = str2bool(dic['calculate_ratio_using_CSim'])
+        self.resize_w = str2int(dic['resize_w'])
+        self.resize_h = str2int(dic['resize_h'])
+
+    def __str__(self):
+        str_out = [
+            ', type:',str(self.type),
+            ', keep_ratio:',str(self.keep_ratio),
+            ', zoom:',str(self.zoom),
+            ', calculate_ratio_using_CSim:',str(self.calculate_ratio_using_CSim),
+            ', resize_w:',str(self.resize_w),
+            ', resize_h:',str(self.resize_h),
+            ', resized_w:',str(self.resized_w),
+            ', resized_h:',str(self.resized_h)]
+        return(' '.join(str_out))
+
+class Hw(Param_base):
+    resize_bit = 12
+    def update(self, **dic):
+        pass
+
+    def __str__(self):
+        str_out = [
+            ', resize_bit:',str(self.resize_bit)]
+        return(' '.join(str_out))
+
+class runner(Runner_base):
+    ## overwrite the class in Runner_base
+    general = General()
+    hw = Hw()
+
+    def __str__(self):
+        return('<Resize>')
+
+    def update(self, **kwargs):
+        super().update(**kwargs)
+        
+        ## if resize size has not been assigned, then it will take model size as resize size
+        if self.general.resize_w == 0 or self.general.resize_h == 0:
+            self.general.resize_w = self.common.model_size[0]
+            self.general.resize_h = self.common.model_size[1]
+        assert(self.general.resize_w > 0)
+        assert(self.general.resize_h > 0)
+
+        ##
+        if self.common.numerical_type == '520':
+            self.general.type = 'fixed_520'
+        elif self.common.numerical_type == '720':
+            self.general.type = 'fixed_720'
+        assert(self.general.type.lower() in ['BILINEAR',  'Bilinear',  'bilinear', 'BICUBIC',  'Bicubic',  'bicubic', 'FIXED',  'Fixed', 'fixed', 'FIXED_520',  'Fixed_520',  'fixed_520', 'FIXED_720', 'Fixed_720', 'fixed_720','CV', 'cv', 'opencv', 'OpenCV', 'CV2', 'cv2'])
+
+
+    def run(self, image_data):
+        ## init
+        ori_w = image_data.shape[1]
+        ori_h = image_data.shape[0]
+        info = {}
+
+        ##
+        if self.general.keep_ratio:
+            self.general.resized_w, self.general.resized_h = self.calcuate_scale_keep_ratio(self.general.resize_w,self.general.resize_h, ori_w, ori_h, self.general.calculate_ratio_using_CSim)
+        else:
+            self.general.resized_w = int(self.general.resize_w)
+            self.general.resized_h = int(self.general.resize_h)
+        assert(self.general.resized_w > 0)
+        assert(self.general.resized_h > 0)
+
+        ##
+        if (self.general.resized_w > ori_w) or (self.general.resized_h > ori_h):
+            if not self.general.zoom: 
+                info['size'] = (ori_w,ori_h)
+                if str2bool(self.common.print_info):
+                    print('no resize')
+                    self.print_info()
+                return image_data, info
+
+        ## resize
+        if self.general.type.lower() in ['BILINEAR',  'Bilinear',  'bilinear']:
+            image_data = self.do_resize_bilinear(image_data, self.general.resized_w, self.general.resized_h)
+        elif self.general.type.lower() in ['BICUBIC',  'Bicubic',  'bicubic']:
+            image_data = self.do_resize_bicubic(image_data, self.general.resized_w, self.general.resized_h)
+        elif self.general.type.lower() in ['CV',  'cv',  'opencv', 'OpenCV',  'CV2',  'cv2']:
+            image_data = self.do_resize_cv2(image_data, self.general.resized_w, self.general.resized_h)
+        elif self.general.type.lower() in ['FIXED',  'Fixed',  'fixed', 'FIXED_520',  'Fixed_520',  'fixed_520', 'FIXED_720', 'Fixed_720', 'fixed_720']:
+            image_data = self.do_resize_fixed(image_data, self.general.resized_w, self.general.resized_h, self.hw.resize_bit, self.general.type)
+
+       
+        # output
+        info['size'] = (self.general.resized_w, self.general.resized_h)
+
+        # print info
+        if str2bool(self.common.print_info):
+            self.print_info()
+
+        return image_data, info
+
+    def calcuate_scale_keep_ratio(self, tar_w, tar_h, ori_w, ori_h, calculate_ratio_using_CSim):
+        if not calculate_ratio_using_CSim:
+            scale_w = tar_w * 1.0 / ori_w*1.0
+            scale_h = tar_h * 1.0 / ori_h*1.0
+            scale = scale_w if scale_w < scale_h else scale_h
+            new_w = int(round(ori_w * scale))
+            new_h = int(round(ori_h * scale))
+            return new_w, new_h
+        
+        ## calculate_ratio_using_CSim
+        scale_w = c_float(tar_w * 1.0 / (ori_w * 1.0)).value
+        scale_h = c_float(tar_h * 1.0 / (ori_h * 1.0)).value
+        scale_ratio = 0.0
+        scale_target_w = 0
+        scale_target_h = 0
+        padH = 0
+        padW = 0
+
+        bScaleW = True if scale_w < scale_h else False
+        if bScaleW:
+            scale_ratio = scale_w
+            scale_target_w = int(c_float(scale_ratio * ori_w + 0.5).value)
+            scale_target_h = int(c_float(scale_ratio * ori_h + 0.5).value)
+            assert (abs(scale_target_w - tar_w) <= 1), "Error: scale down width cannot meet expectation\n"
+            padH = tar_h - scale_target_h
+            padW = 0
+            assert (padH >= 0), "Error: padH shouldn't be less than zero\n"
+        else:
+            scale_ratio = scale_h 
+            scale_target_w = int(c_float(scale_ratio * ori_w + 0.5).value)
+            scale_target_h = int(c_float(scale_ratio * ori_h + 0.5).value)
+            assert (abs(scale_target_h - tar_h) <= 1), "Error: scale down height cannot meet expectation\n"
+            padW = tar_w - scale_target_w
+            padH = 0
+            assert (padW >= 0), "Error: padW shouldn't be less than zero\n"
+        new_w = tar_w - padW
+        new_h = tar_h - padH
+        return new_w, new_h
+    
+    def do_resize_bilinear(self, image_data, resized_w, resized_h):
+        img = Image.fromarray(image_data)
+        img = img.resize((resized_w, resized_h), Image.BILINEAR)
+        image_data = np.array(img).astype('uint8')
+        return image_data        
+
+    def do_resize_bicubic(self, image_data, resized_w, resized_h):
+        img = Image.fromarray(image_data)
+        img = img.resize((resized_w, resized_h), Image.BICUBIC)
+        image_data = np.array(img).astype('uint8')
+        return image_data
+
+    def do_resize_cv2(self, image_data, resized_w, resized_h):
+        image_data = cv2.resize(image_data, (resized_w, resized_h))
+        image_data = np.array(image_data)
+        # image_data = np.array(image_data).astype('uint8')
+        return image_data
+
+    def do_resize_fixed(self, image_data, resized_w, resized_h, resize_bit, type):
+        if len(image_data.shape) < 3:
+            m, n = image_data.shape
+            tmp = np.zeros((m,n,3), dtype=np.uint8)
+            tmp[:,:,0] = image_data
+            image_data = tmp
+            c = 3
+            gray = True
+        else:
+            m, n, c = image_data.shape
+            gray = False
+
+        resolution = 1 << resize_bit
+
+        # Width
+        ratio = int(((n - 1) << resize_bit) / (resized_w - 1))
+        ratio_cnt = 0
+        src_x = 0
+        resized_image_w = np.zeros((m, resized_w, c), dtype=np.uint8)
+        
+        for dst_x in range(resized_w):
+            while ratio_cnt > resolution:
+                ratio_cnt = ratio_cnt - resolution
+                src_x = src_x + 1
+            mul1 = np.ones((m, c)) * (resolution - ratio_cnt)
+            mul2 = np.ones((m, c)) * ratio_cnt
+            resized_image_w[:, dst_x, :] = np.multiply(np.multiply(
+                image_data[:, src_x, :], mul1) + np.multiply(image_data[:, src_x + 1, :], mul2), 1/resolution)
+            ratio_cnt = ratio_cnt + ratio
+
+        # Height
+        ratio = int(((m - 1) << resize_bit) / (resized_h - 1))
+        ## NPU HW special case 2 , only on 520
+        if type.lower() in ['FIXED_520',  'Fixed_520',  'fixed_520']:
+            if (((ratio * (resized_h - 1)) % 4096 == 0) and ratio != 4096):
+                ratio -= 1
+
+        ratio_cnt = 0
+        src_x = 0
+        resized_image = np.zeros(
+            (resized_h, resized_w, c), dtype=np.uint8)
+        for dst_x in range(resized_h):
+            while ratio_cnt > resolution:
+                ratio_cnt = ratio_cnt - resolution
+                src_x = src_x + 1
+                       
+            mul1 = np.ones((resized_w, c)) * (resolution - ratio_cnt)
+            mul2 = np.ones((resized_w, c)) * ratio_cnt
+            
+            ## NPU HW special case 1 , both on 520 / 720
+            if (((dst_x > 0) and ratio_cnt == resolution) and (ratio != resolution)):
+                if type.lower() in ['FIXED_520',  'Fixed_520',  'fixed_520','FIXED_720',  'Fixed_720',  'fixed_720' ]:
+                    resized_image[dst_x, :, :] = np.multiply(np.multiply(
+                        resized_image_w[src_x+1, :, :], mul1) + np.multiply(resized_image_w[src_x + 2, :, :], mul2), 1/resolution)
+            else:
+                resized_image[dst_x, :, :] = np.multiply(np.multiply(
+                    resized_image_w[src_x, :, :], mul1) + np.multiply(resized_image_w[src_x + 1, :, :], mul2), 1/resolution)
+
+            ratio_cnt = ratio_cnt + ratio
+
+        if gray:
+            resized_image = resized_image[:,:,0]
+
+        return resized_image
--- a/kneron/preprocessing/funcs/Rotate.py
+++ b/kneron/preprocessing/funcs/Rotate.py
@ -0,0 +1,45 @@
+import numpy as np
+from .utils import str2bool, str2int
+
+class runner(object):
+    def __init__(self, *args, **kwargs):
+        self.set = {
+            'operator': '',
+            "rotate_direction": 0,
+
+        }
+        self.update(*args, **kwargs)
+
+    def update(self, *args, **kwargs):
+        self.set.update(kwargs)
+        self.rotate_direction = str2int(self.set['rotate_direction'])
+
+        # print info
+        if str2bool(self.set['b_print']):
+            self.print_info()
+
+    def print_info(self):
+        print("<rotate>",
+            'rotate_direction', self.rotate_direction,)
+
+
+    def run(self, image_data):
+        image_data = self._rotate(image_data)
+        return image_data
+
+    def _rotate(self,img):
+        if self.rotate_direction == 1 or self.rotate_direction == 2:
+            col, row, unit = img.shape
+            pInBuf = img.reshape((-1,1))
+            pOutBufTemp = np.zeros((col* row* unit))
+            for r in range(row):
+                for c in range(col):
+                    for u in range(unit):
+                        if self.rotate_direction == 1:
+                            pOutBufTemp[unit * (c * row + (row - r - 1))+u] = pInBuf[unit * (r * col + c)+u]
+                        elif self.rotate_direction == 2:
+                            pOutBufTemp[unit * (row * (col - c - 1) + r)+u] = pInBuf[unit * (r * col + c)+u]
+
+            img = pOutBufTemp.reshape((col,row,unit))
+
+        return img
--- a/kneron/preprocessing/funcs/Runner_base.py
+++ b/kneron/preprocessing/funcs/Runner_base.py
@ -0,0 +1,59 @@
+from abc import ABCMeta, abstractmethod
+
+class Param_base(object):
+    @abstractmethod
+    def update(self,**dic):
+        raise NotImplementedError("Must override")
+
+    def load_dic(self, key, **dic):
+        if key in dic:
+            param = eval('self.'+key)
+            param = dic[key]
+
+    def __str__(self):
+        str_out = []
+        return(' '.join(str_out))
+  
+
+class Common(Param_base):
+    print_info = False
+    model_size = [0,0]
+    numerical_type = 'floating'
+
+    def update(self, **dic):
+        self.print_info = dic['print_info']
+        self.model_size = dic['model_size']
+        self.numerical_type = dic['numerical_type']
+    
+    def __str__(self):
+        str_out = ['numerical_type:',str(self.numerical_type)]
+        return(' '.join(str_out))
+    
+class Runner_base(metaclass=ABCMeta):
+    common = Common()
+    general = Param_base()
+    floating = Param_base()
+    hw = Param_base()
+
+    def update(self, **kwargs):
+        ## update param
+        self.common.update(**kwargs['common'])
+        self.general.update(**kwargs['general'])
+        assert(self.common.numerical_type.lower() in ['floating', '520', '720'])
+        if (self.common.numerical_type == 'floating'):
+            if (self.floating.__class__.__name__ != 'Param_base'):
+                self.floating.update(**kwargs['floating'])
+        else:
+            if (self.hw.__class__.__name__ != 'Param_base'):
+                self.hw.update(**kwargs['hw'])
+
+    def print_info(self):
+        if (self.common.numerical_type == 'floating'):
+            print(self, self.common, self.general, self.floating)
+        else:
+            print(self, self.common, self.general, self.hw)
+        
+
+
+        
+
--- a/kneron/preprocessing/funcs/init.py
+++ b/kneron/preprocessing/funcs/init.py
@ -0,0 +1,2 @@
+from . import ColorConversion, Padding, Resize, Crop, Normalize, Rotate
+
--- a/kneron/preprocessing/funcs/utils.py
+++ b/kneron/preprocessing/funcs/utils.py
@ -0,0 +1,372 @@
+import numpy as np
+from PIL import Image
+import struct
+
+def pad_square_to_4(x_start, x_end, y_start, y_end):
+    w_int = x_end - x_start 
+    h_int = y_end - y_start
+    pad = w_int - h_int
+    if pad > 0:
+        pad_s = (pad >> 1) &(~3)
+        pad_e = pad - pad_s
+        y_start -= pad_s
+        y_end += pad_e
+    else:#//pad <=0
+        pad_s = -(((pad) >> 1) &(~3))
+        pad_e = (-pad) - pad_s
+        x_start -= pad_s
+        x_end += pad_e
+    return x_start, x_end, y_start, y_end
+
+def str_fill(value):
+    if len(value) == 1:
+        value = "0" + value
+    elif len(value) == 0:
+        value = "00"
+
+    return value
+
+def clip_ary(value):
+    list_v = []
+    for i in range(len(value)):
+        v = value[i] % 256
+        list_v.append(v)
+
+    return list_v
+    
+def str2bool(v):
+    if isinstance(v,bool):
+        return v
+    return v.lower() in ('TRUE', 'True', 'true', '1', 'T', 't', 'Y', 'YES', 'y', 'yes')
+
+
+def str2int(s):
+    if s == "":
+        s = 0
+    s = int(s)
+    return s
+
+def str2float(s):
+    if s == "":
+        s = 0
+    s = float(s)
+    return s
+
+def clip(value, mini, maxi):
+    if value < mini:
+        result = mini
+    elif value > maxi:
+        result = maxi
+    else:
+        result = value
+
+    return result
+
+
+def clip_ary(value):
+    list_v = []
+    for i in range(len(value)):
+        v = value[i] % 256
+        list_v.append(v)
+
+    return list_v
+
+
+def signed_rounding(value, bit):
+    if value < 0:
+        value = value - (1 << (bit - 1))
+    else:
+        value = value + (1 << (bit - 1))
+
+    return value
+
+def hex_loader(data_folder,**kwargs):
+    format_mode = kwargs['raw_img_fmt']
+    src_h = kwargs['img_in_height']
+    src_w = kwargs['img_in_width']
+
+    if format_mode in ['YUV444', 'yuv444', 'YCBCR444', 'YCbCr444', 'ycbcr444']:
+        output = hex_yuv444(data_folder,src_h,src_w)
+    elif format_mode in ['RGB565', 'rgb565']:
+        output = hex_rgb565(data_folder,src_h,src_w)
+    elif format_mode in ['YUV422', 'yuv422', 'YCBCR422', 'YCbCr422', 'ycbcr422']:
+        output = hex_yuv422(data_folder,src_h,src_w)
+
+    return output
+
+def hex_rgb565(hex_folder,src_h,src_w):
+    pix_per_line = 8
+    byte_per_line = 16
+
+    f = open(hex_folder)
+    pixel_r = []
+    pixel_g = []
+    pixel_b = []
+
+    # Ignore the first line
+    f.readline()
+    input_line = int((src_h * src_w)/pix_per_line)
+    for i in range(input_line):
+        readline = f.readline()
+        for j in range(int(byte_per_line/2)-1, -1, -1):
+            data1 = int(readline[(j * 4 + 0):(j * 4 + 2)], 16)
+            data0 = int(readline[(j * 4 + 2):(j * 4 + 4)], 16)
+            r = ((data1 & 0xf8) >> 3)
+            g = (((data0 & 0xe0) >> 5) + ((data1 & 0x7) << 3))
+            b = (data0 & 0x1f)
+            pixel_r.append(r)
+            pixel_g.append(g)
+            pixel_b.append(b)
+
+    ary_r = np.array(pixel_r, dtype=np.uint8)
+    ary_g = np.array(pixel_g, dtype=np.uint8)
+    ary_b = np.array(pixel_b, dtype=np.uint8)
+    output = np.concatenate((ary_r[:, None], ary_g[:, None], ary_b[:, None]), axis=1)
+    output = output.reshape((src_h, src_w, 3))
+
+    return output
+
+def hex_yuv444(hex_folder,src_h,src_w):
+    pix_per_line = 4
+    byte_per_line = 16
+
+    f = open(hex_folder)
+    byte0 = []
+    byte1 = []
+    byte2 = []
+    byte3 = []
+
+    # Ignore the first line
+    f.readline()
+    input_line = int((src_h * src_w)/pix_per_line)
+    for i in range(input_line):
+        readline = f.readline()
+        for j in range(byte_per_line-1, -1, -1):
+            data = int(readline[(j*2):(j*2+2)], 16)
+            if (j+1) % 4 == 0:
+                byte0.append(data)
+            elif (j+2) % 4 == 0:
+                byte1.append(data)
+            elif (j+3) % 4 == 0:
+                byte2.append(data)
+            elif (j+4) % 4 == 0:
+                byte3.append(data)
+    # ary_a = np.array(byte0, dtype=np.uint8)
+    ary_v = np.array(byte1, dtype=np.uint8)
+    ary_u = np.array(byte2, dtype=np.uint8)
+    ary_y = np.array(byte3, dtype=np.uint8)
+    output = np.concatenate((ary_y[:, None], ary_u[:, None], ary_v[:, None]), axis=1)
+    output = output.reshape((src_h, src_w, 3))
+
+    return output
+
+def hex_yuv422(hex_folder,src_h,src_w):
+    pix_per_line = 8
+    byte_per_line = 16
+    f = open(hex_folder)
+    pixel_y = []
+    pixel_u = []
+    pixel_v = []
+
+    # Ignore the first line
+    f.readline()
+    input_line = int((src_h * src_w)/pix_per_line)
+    for i in range(input_line):
+        readline = f.readline()
+        for j in range(int(byte_per_line/4)-1, -1, -1):
+            data3 = int(readline[(j * 8 + 0):(j * 8 + 2)], 16)
+            data2 = int(readline[(j * 8 + 2):(j * 8 + 4)], 16)
+            data1 = int(readline[(j * 8 + 4):(j * 8 + 6)], 16)
+            data0 = int(readline[(j * 8 + 6):(j * 8 + 8)], 16)
+            pixel_y.append(data3)
+            pixel_y.append(data1)
+            pixel_u.append(data2)
+            pixel_u.append(data2)
+            pixel_v.append(data0)
+            pixel_v.append(data0)
+
+    ary_y = np.array(pixel_y, dtype=np.uint8)
+    ary_u = np.array(pixel_u, dtype=np.uint8)
+    ary_v = np.array(pixel_v, dtype=np.uint8)
+    output = np.concatenate((ary_y[:, None], ary_u[:, None], ary_v[:, None]), axis=1)
+    output = output.reshape((src_h, src_w, 3))
+
+    return output
+
+def bin_loader(data_folder,**kwargs):
+    format_mode = kwargs['raw_img_fmt']
+    src_h = kwargs['img_in_height']
+    src_w = kwargs['img_in_width']
+    if format_mode in ['YUV','yuv','YUV444', 'yuv444', 'YCBCR','YCbCr','ycbcr','YCBCR444', 'YCbCr444', 'ycbcr444']:
+        output = bin_yuv444(data_folder,src_h,src_w)
+    elif format_mode in ['RGB565', 'rgb565']:
+        output = bin_rgb565(data_folder,src_h,src_w)
+    elif format_mode in ['NIR', 'nir','NIR888', 'nir888']:
+        output = bin_nir(data_folder,src_h,src_w)
+    elif format_mode in ['YUV422', 'yuv422', 'YCBCR422', 'YCbCr422', 'ycbcr422']:
+        output = bin_yuv422(data_folder,src_h,src_w)
+    elif format_mode in ['RGB888','rgb888']:
+        output = np.fromfile(data_folder, dtype='uint8')
+        output = output.reshape(src_h,src_w,3)
+    elif format_mode in ['RGBA8888','rgba8888', 'RGBA' , 'rgba']:
+        output_temp = np.fromfile(data_folder, dtype='uint8')
+        output_temp = output_temp.reshape(src_h,src_w,4)
+        output = output_temp[:,:,0:3]
+
+    return output
+
+def bin_yuv444(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+    
+    row = src_h
+    col = src_w
+    pixels = row*col
+
+    raw = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            raw.append(s[0])
+    
+
+    raw = raw[:pixels*4]
+
+    #
+    output = np.zeros((pixels * 3), dtype=np.uint8)
+    cnt = 0
+    for i in range(0, pixels*4, 4):
+        #Y
+        output[cnt] = raw[i+3]
+        #U
+        cnt += 1
+        output[cnt] = raw[i+2]
+        #V
+        cnt += 1
+        output[cnt] = raw[i+1]
+
+        cnt += 1          
+
+    output = output.reshape((src_h,src_w,3))
+    return output
+    
+def bin_yuv422(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+    
+    row = src_h
+    col = src_w
+    pixels = row*col
+
+    raw = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            raw.append(s[0])
+    
+
+    raw = raw[:pixels*2]
+
+    #
+    output = np.zeros((pixels * 3), dtype=np.uint8)
+    cnt = 0
+    for i in range(0, pixels*2, 4):
+        #Y0
+        output[cnt] = raw[i+3]
+        #U0
+        cnt += 1
+        output[cnt] = raw[i+2]
+        #V0
+        cnt += 1
+        output[cnt] = raw[i]
+        #Y1
+        cnt += 1
+        output[cnt] = raw[i+1]
+        #U1
+        cnt += 1
+        output[cnt] = raw[i+2]
+        #V1
+        cnt += 1
+        output[cnt] = raw[i]
+
+        cnt += 1          
+
+    output = output.reshape((src_h,src_w,3))
+    return output
+
+def bin_rgb565(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+    
+    row = src_h
+    col = src_w
+    pixels = row*col
+
+    rgba565 = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            rgba565.append(s[0])
+    
+
+    rgba565 = rgba565[:pixels*2]
+
+    # rgb565_bin to numpy_array
+    output = np.zeros((pixels * 3), dtype=np.uint8)
+    cnt = 0
+    for i in range(0, pixels*2, 2):
+        temp = rgba565[i]
+        temp2 = rgba565[i+1]
+        #R-5
+        output[cnt] = (temp2 >>3)
+        
+        #G-6
+        cnt += 1
+        output[cnt] = ((temp & 0xe0) >> 5) + ((temp2 & 0x07) << 3)
+        
+        #B-5
+        cnt += 1
+        output[cnt] = (temp & 0x1f)
+
+        cnt += 1          
+
+    output = output.reshape((src_h,src_w,3))
+    return output
+
+def bin_nir(in_img_path,src_h,src_w):
+    # load bin
+    struct_fmt = '1B' 
+    struct_len = struct.calcsize(struct_fmt)
+    struct_unpack = struct.Struct(struct_fmt).unpack_from
+
+    nir = []
+    with open(in_img_path, "rb") as f:
+        while True:
+            data = f.read(struct_len)
+            if not data: break
+            s = struct_unpack(data)
+            nir.append(s[0])
+            
+    nir = nir[:src_h*src_w]
+    pixels = len(nir)
+    # nir_bin to numpy_array
+    output = np.zeros((len(nir) * 3), dtype=np.uint8)
+    for i in range(0, pixels):
+        output[i*3]=nir[i]
+        output[i*3+1]=nir[i]
+        output[i*3+2]=nir[i]
+
+    output = output.reshape((src_h,src_w,3))
+    return output
--- a/kneron/preprocessing/funcs/utils_520.py
+++ b/kneron/preprocessing/funcs/utils_520.py
@ -0,0 +1,50 @@
+import math
+
+def round_up_16(num):
+    return ((num + (16 - 1)) & ~(16 - 1))
+
+def round_up_n(num, n):
+    if (num > 0):
+        temp = float(num) / n
+        return math.ceil(temp) * n
+    else:
+        return -math.ceil(float(-num) / n) * n
+
+def cal_img_row_offset(crop_num, pad_num, start_row, out_row, orig_row):
+
+    scaled_img_row = int(out_row - (pad_num[1] + pad_num[3]))
+    if ((start_row - pad_num[1]) > 0):
+        img_str_row = int((start_row - pad_num[1]))
+    else:
+        img_str_row = 0
+    valid_row = int(orig_row - (crop_num[1] + crop_num[3]))
+    img_str_row = int(valid_row * img_str_row / scaled_img_row)
+    return int(img_str_row + crop_num[1])
+
+def get_pad_num(pad_num_orig, left, up, right, bottom):
+    pad_num = [0]*4
+    for i in range(0,4):
+        pad_num[i] = pad_num_orig[i]
+
+    if not (left):
+        pad_num[0] = 0
+    if not (up):
+        pad_num[1] = 0
+    if not (right):
+        pad_num[2] = 0
+    if not (bottom):
+        pad_num[3] = 0
+
+    return pad_num
+
+def get_byte_per_pixel(raw_fmt):
+    if raw_fmt.lower() in ['RGB888', 'rgb888', 'RGB', 'rgb888']:
+        return 4
+    elif raw_fmt.lower() in ['YUV', 'yuv', 'YUV422', 'yuv422']:
+        return 2
+    elif raw_fmt.lower() in ['RGB565', 'rgb565']:
+        return 2
+    elif raw_fmt.lower() in ['NIR888', 'nir888', 'NIR', 'nir']:
+        return 1
+    else:
+        return -1
--- a/kneron/preprocessing/funcs/utils_720.py
+++ b/kneron/preprocessing/funcs/utils_720.py
@ -0,0 +1,42 @@
+import numpy as np
+from PIL import Image
+
+def twos_complement(value):
+    value = int(value)
+    # msb = (value & 0x8000) * (1/np.power(2, 15))
+    msb = (value & 0x8000) >> 15
+    if msb == 1:
+        if (((~value) & 0xFFFF) + 1) >= 0xFFFF:
+            result = ((~value) & 0xFFFF)
+        else:
+            result = (((~value) & 0xFFFF) + 1)
+        result = result * (-1)
+    else:
+        result = value
+
+    return result
+
+
+def twos_complement_pix(value):
+    h, _ = value.shape
+    for i in range(h):
+        value[i, 0] = twos_complement(value[i, 0])
+
+    return value
+
+def clip(value, mini, maxi):
+    if value < mini:
+        result = mini
+    elif value > maxi:
+        result = maxi
+    else:
+        result = value
+
+    return result
+
+def clip_pix(value, mini, maxi):
+    h, _ = value.shape
+    for i in range(h):
+        value[i, 0] = clip(value[i, 0], mini, maxi)
+
+    return value
--- a/kneron/quantize_yolov5.py
+++ b/kneron/quantize_yolov5.py
@ -0,0 +1,66 @@
+import os
+import numpy as np
+import torch
+import ktc  # Kneron Toolchain
+import onnx
+from yolov5_preprocess import Yolov5_preprocess
+import kneron_preprocessing
+
+# 設定裝置
+device = torch.device("cuda" if torch.cuda.is_available() else "cpu")
+
+# 設定圖片大小
+imgsz_h, imgsz_w = 640, 640
+
+# 設定正確的數據目錄
+data_path = "/workspace/yolov5/data50"
+
+# 確保 data50 內有圖片
+files_found = [f for _, _, files in os.walk(data_path) for f in files if f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp"))]
+
+if not files_found:
+    raise FileNotFoundError(f"❌ Error: No images found in {data_path}! Please check your dataset.")
+
+print(f"✅ Found {len(files_found)} images in {data_path}")
+
+# **獲取 ONNX 模型的輸入名稱**
+onnx_model_path = "/workspace/yolov5/runs/train/exp24/weights/latest.opt.onnx"
+m = onnx.load(onnx_model_path)
+input_name = m.graph.input[0].name  # 確保 key 與 ONNX input name 一致
+
+km = ktc.ModelConfig(20008, "0001", "720", onnx_model=onnx_model_path)
+# 存儲預處理後的圖片數據
+img_list = []
+
+# 遍歷 data50 並進行預處理
+for root, _, files in os.walk(data_path):
+    for f in files:
+        fullpath = os.path.join(root, f)
+
+        # **只處理圖片文件**
+        if not f.lower().endswith((".jpg", ".jpeg", ".png", ".bmp")):
+            print(f"⚠️ Skipping non-image file: {fullpath}")
+            continue
+
+        # **嘗試處理圖片**
+        try:
+            img_data, _ = Yolov5_preprocess(fullpath, device, imgsz_h, imgsz_w)
+            img_data = img_data.cpu().numpy()
+            print(f"✅ Processed: {fullpath}")
+            img_list.append(img_data)
+        except Exception as e:
+            print(f"❌ Failed to process {fullpath}: {e}")
+
+# **確保 img_list 不是空的**
+if not img_list:
+    raise ValueError("❌ Error: No valid images were processed! Please check the image paths and formats.")
+
+# **執行 BIE 量化**
+bie_model_path = km.analysis({input_name: img_list})
+
+# **確認 BIE 模型是否生成**
+if not os.path.exists(bie_model_path):
+    raise RuntimeError(f"❌ Error: BIE model was not generated! Please check your quantization process.")
+
+# 顯示成功訊息
+print("\n✅ Fixed-point analysis done! BIE model saved to:", bie_model_path)
--- a/kneron/removenode.py
+++ b/kneron/removenode.py
@ -0,0 +1,34 @@
+import onnx
+import ktc.onnx_optimizer as kneron_opt
+from onnx import helper
+
+def replace_sigmoid_with_identity(model):
+    """
+    Replaces all Sigmoid nodes with Identity nodes to maintain model integrity.
+    """
+    for node in model.graph.node:
+        if node.op_type == "Sigmoid":
+            print(f"Replacing {node.name} with Identity")
+            identity_node = helper.make_node(
+                "Identity",
+                inputs=node.input,
+                outputs=node.output,
+                name=node.name + "_identity"
+            )
+            model.graph.node.extend([identity_node])
+            model.graph.node.remove(node)
+
+    return model
+
+def process_onnx(input_onnx_path, output_onnx_path):
+    """ Replaces Sigmoid with Identity and saves the new model """
+    model = onnx.load(input_onnx_path)
+    model = replace_sigmoid_with_identity(model)
+    onnx.save(model, output_onnx_path)
+    print(f"Modified ONNX model saved to: {output_onnx_path}")
+
+# 使用 Docker 掛載的路徑
+input_onnx = "/workspace/yolov5/runs/train/exp24/weights/best_simplified.onnx"
+output_onnx = "/workspace/yolov5/runs/train/exp24/weights/best_no_sigmoid.onnx"
+
+process_onnx(input_onnx, output_onnx)
--- a/kneron/yolov5_export.py
+++ b/kneron/yolov5_export.py
@ -0,0 +1,80 @@
+import os
+import torch
+import sys
+import yaml
+import argparse
+
+from yolov5_runner import Yolov5Runner
+
+def save_weight(num_classes): 
+    current_path=os.getcwd()
+    par_path = os.path.dirname(current_path)
+    sys.path.append(os.path.join(par_path, 'yolov5'))
+    from models.yolo import Model  
+    num_classes = num_classes 
+    device=torch.device('cpu')
+    ckpt = torch.load(path, map_location=device)
+    model = Model(yaml_path, nc=num_classes)
+    ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() if k in model.state_dict() and model.state_dict()[k].shape == v.shape}
+    model.load_state_dict(ckpt['model'])
+    torch.save(model.state_dict(),pt_path,_use_new_zipfile_serialization=False)
+    
+def export_onnx(input_h, input_w, num_classes):
+
+    onnx_batch_size, onnx_img_h, onnx_img_w = 1, input_h, input_w
+    yolov5_model = Yolov5Runner(model_path=pt_path, yaml_path=yaml_path, grid20_path=grid20_path, grid40_path=grid40_path, grid80_path=grid80_path, num_classes=num_classes, imgsz_h=onnx_img_h, imgsz_w=onnx_img_w, conf_thres=0.001, iou_thres=0.65, top_k_num=3000, vanish_point=0.0) 
+    
+    # Input
+    img = torch.zeros((onnx_batch_size, 3, onnx_img_h, onnx_img_w))  
+    # img = img.type(torch.cuda.FloatTensor)
+
+    # Load PyTorch model
+    model = yolov5_model.yolov5_model
+    model.eval()
+    model.model[-1].export = True  # set Detect() layer export=True
+    y = model(img)  # dry run
+
+    # ONNX export
+    try:
+        import onnx
+        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+        print('****onnx file****',onnx_export_file)
+        torch.onnx.export(model, img, onnx_export_file, verbose=False, opset_version=11, keep_initializers_as_inputs=True, input_names=['images'], output_names=['classes', 'boxes'] if y is None else ['output'])
+        # Checks
+        onnx_model = onnx.load(onnx_export_file)  # load onnx model
+        onnx.checker.check_model(onnx_model)  # check onnx model
+        print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+        print('ONNX export success, saved as %s' % onnx_export_file)
+    except Exception as e:
+        print('ONNX export failure: %s' % e)
+
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default='../yolov5/data/pretrained_paths_520.yaml', help='the path to pretrained model paths yaml file')
+
+    args = parser.parse_args()
+    
+    with open(args.data) as f:
+        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
+        
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    num_classes = data_dict['nc']
+    input_w = data_dict['input_w']
+    input_h = data_dict['input_h']
+    grid_dir = data_dict['grid_dir']
+    grid20_path = data_dict['grid20_path']
+    grid40_path = data_dict['grid40_path']
+    grid80_path = data_dict['grid80_path']
+    path = data_dict['path']
+    pt_path=data_dict['pt_path']
+    yaml_path=data_dict['yaml_path']
+    onnx_export_file = data_dict['onnx_export_file']
+    save_weight(num_classes)
+    export_onnx(input_h, input_w, num_classes)
+
+
+
+
+
+
--- a/kneron/yolov5_preprocess.py
+++ b/kneron/yolov5_preprocess.py
@ -0,0 +1,161 @@
+# coding: utf-8
+import torch
+import cv2
+import numpy as np
+import math
+import time
+import kneron_preprocessing
+
+kneron_preprocessing.API.set_default_as_520()
+torch.backends.cudnn.deterministic = True
+img_formats = ['.bmp', '.jpg', '.jpeg', '.png', '.tif', '.tiff', '.dng']
+def make_divisible(x, divisor):
+    # Returns x evenly divisble by divisor
+    return math.ceil(x / divisor) * divisor
+
+def check_img_size(img_size, s=32):
+    # Verify img_size is a multiple of stride s
+    new_size = make_divisible(img_size, int(s))  # ceil gs-multiple
+    if new_size != img_size:
+        print('WARNING: --img-size %g must be multiple of max stride %g, updating to %g' % (img_size, s, new_size))
+    return new_size
+
+def letterbox_ori(img, new_shape=(640, 640), color=(0, 0, 0), auto=True, scaleFill=False, scaleup=True):
+    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # width, height 
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+    dw /= 2  # divide padding into 2 sides
+    dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        #img = kneron_preprocessing.API.resize(img,size=new_unpad, keep_ratio = False)
+
+    top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    # top, bottom = int(0), int(round(dh + 0.1))
+    # left, right = int(0), int(round(dw + 0.1))    
+    img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    #img = kneron_preprocessing.API.pad(img, left, right, top, bottom, 0)
+
+    return img, ratio, (dw, dh)
+
+def letterbox(img, new_shape=(640, 640), color=(0, 0, 0), auto=True, scaleFill=False, scaleup=True):
+    # Resize image to a 32-pixel-multiple rectangle https://github.com/ultralytics/yolov3/issues/232
+    shape = img.shape[:2]  # current shape [height, width]
+    if isinstance(new_shape, int):
+        new_shape = (new_shape, new_shape)
+
+    # Scale ratio (new / old)
+    r = min(new_shape[0] / shape[0], new_shape[1] / shape[1])
+    if not scaleup:  # only scale down, do not scale up (for better test mAP)
+        r = min(r, 1.0)
+
+    # Compute padding
+    ratio = r, r  # width, height ratios
+    new_unpad = int(round(shape[1] * r)), int(round(shape[0] * r)) # width, height 
+    dw, dh = new_shape[1] - new_unpad[0], new_shape[0] - new_unpad[1]  # wh padding
+
+    # dw /= 2  # divide padding into 2 sides
+    # dh /= 2
+
+    if shape[::-1] != new_unpad:  # resize
+        #img = cv2.resize(img, new_unpad, interpolation=cv2.INTER_LINEAR)
+        img = kneron_preprocessing.API.resize(img,size=new_unpad, keep_ratio = False)
+
+    # top, bottom = int(round(dh - 0.1)), int(round(dh + 0.1))
+    # left, right = int(round(dw - 0.1)), int(round(dw + 0.1))
+    top, bottom = int(0), int(round(dh + 0.1))
+    left, right = int(0), int(round(dw + 0.1))    
+    #img = cv2.copyMakeBorder(img, top, bottom, left, right, cv2.BORDER_CONSTANT, value=color)  # add border
+    img = kneron_preprocessing.API.pad(img, left, right, top, bottom, 0)
+
+    return img, ratio, (dw, dh)
+
+def letterbox_test(img, new_shape=(640, 640), color=(0, 0, 0), auto=True, scaleFill=False, scaleup=True):
+
+    ratio = 1.0, 1.0
+    dw, dh = 0, 0
+    img = kneron_preprocessing.API.resize(img, size=(480, 256), keep_ratio=False, type='bilinear')
+    return img, ratio, (dw, dh)
+
+def LoadImages(path,img_size):  #_rgb # for inference
+    if isinstance(path, str):
+        img0 = cv2.imread(path)  # BGR       
+    else:
+        img0 = path  # BGR
+
+    # Padded resize
+    img = letterbox(img0, new_shape=img_size)[0]
+    # Convert
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    return img, img0
+
+def LoadImages_yyy(path,img_size): #_yyy # for inference
+    if isinstance(path, str):
+        img0 = cv2.imread(path)  # BGR       
+    else:
+        img0 = path  # BGR
+
+    yvu = cv2.cvtColor(img0, cv2.COLOR_BGR2YCrCb)
+    y, v, u = cv2.split(yvu)
+    img0 = np.stack((y,)*3, axis=-1)
+
+    # Padded resize
+    img = letterbox(img0, new_shape=img_size)[0]
+
+    # Convert
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    return img, img0
+
+def LoadImages_yuv420(path,img_size):  #_yuv420 # for inference 
+    if isinstance(path, str):
+        img0 = cv2.imread(path)  # BGR       
+    else:
+        img0 = path  # BGR
+    img_h, img_w = img0.shape[:2]
+    img_h = (img_h // 2) * 2
+    img_w = (img_w // 2) * 2
+    img = img0[:img_h,:img_w,:]
+    yuv = cv2.cvtColor(img, cv2.COLOR_BGR2YUV_I420)
+    img0= cv2.cvtColor(yuv, cv2.COLOR_YUV2BGR_I420) #yuv420
+
+    
+    # Padded resize
+    img = letterbox(img0, new_shape=img_size)[0]
+
+    # Convert
+    img = img[:, :, ::-1].transpose(2, 0, 1)  # BGR to RGB, to 3x416x416
+    img = np.ascontiguousarray(img)
+    return img, img0
+
+def Yolov5_preprocess(image_path, device, imgsz_h, imgsz_w) : 
+    model_stride_max = 32
+    imgsz_h = check_img_size(imgsz_h, s=model_stride_max)  # check img_size
+    imgsz_w = check_img_size(imgsz_w, s=model_stride_max)  # check img_size
+    img, im0 = LoadImages(image_path, img_size=(imgsz_h,imgsz_w))
+    img = kneron_preprocessing.API.norm(img) #path1
+    #print('img',img.shape)
+    img = torch.from_numpy(img).to(device) #path1,path2
+    # img = img.float()  # uint8 to fp16/32 #path2
+    # img /= 255.0#256.0 - 0.5 # 0 - 255 to -0.5 - 0.5 #path2
+    
+    if img.ndimension() == 3:
+        img = img.unsqueeze(0)
+    
+    return img, im0
+
--- a/kneron/yolov5_savingWeight.py
+++ b/kneron/yolov5_savingWeight.py
@ -0,0 +1,42 @@
+import os
+import torch
+import sys
+import argparse
+import yaml
+
+def save_weight(num_classes): 
+    from models.yolo import Model  
+    num_classes = num_classes 
+    device=torch.device('cpu')
+    ckpt = torch.load(path, map_location=device)
+    model = Model(yaml_path, nc=num_classes)
+    ckpt['model'] = {k: v for k, v in ckpt['model'].float().state_dict().items() if k in model.state_dict() and model.state_dict()[k].shape == v.shape}
+    model.load_state_dict(ckpt['model'])
+    torch.save(model.state_dict(),pt_path,_use_new_zipfile_serialization=False)
+
+
+if __name__ == '__main__':  
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--data', type=str, default='data/pretrained_paths_520.yaml', help='the path to pretrained model paths yaml file')
+    args = parser.parse_args()
+    
+    with open(args.data) as f:
+        data_dict = yaml.load(f, Loader=yaml.FullLoader)  # data dict
+        
+    os.environ["CUDA_VISIBLE_DEVICES"] = '0'
+    input_w = data_dict['input_w']
+    input_h = data_dict['input_h']
+    grid_dir = data_dict['grid_dir']
+    grid20_path = data_dict['grid20_path']
+    grid40_path = data_dict['grid40_path']
+    grid80_path = data_dict['grid80_path']
+    path = data_dict['path']
+    pt_path=data_dict['pt_path']
+    yaml_path=data_dict['yaml_path']
+
+    save_weight(data_dict['nc']) 
+
+
+
+
+
--- a/models/init.py
+++ b/models/init.py
--- a/models/common.py
+++ b/models/common.py
@ -0,0 +1,246 @@
+# This file contains modules common to various models
+
+import math
+
+import numpy as np
+import torch
+import torch.nn as nn
+import torch.nn.functional as F
+from PIL import Image, ImageDraw
+
+from utils.datasets import letterbox
+from utils.general import non_max_suppression, make_divisible, scale_coords, xyxy2xywh
+from utils.plots import color_list
+
+
+def autopad(k, p=None):  # kernel, padding
+    # Pad to 'same'
+    if p is None:
+        p = k // 2 if isinstance(k, int) else [x // 2 for x in k]  # auto-pad
+    return p
+
+
+def DWConv(c1, c2, k=1, s=1, act=True):
+    # Depthwise convolution
+    return Conv(c1, c2, k, s, g=math.gcd(c1, c2), act=act)
+
+
+class Conv(nn.Module):
+    # Standard convolution
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Conv, self).__init__()
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)
+        self.bn = nn.BatchNorm2d(c2)
+        #self.act = nn.Hardswish() if act else nn.Identity()
+        self.act = nn.LeakyReLU(0.1, inplace=True) if act else nn.Identity()
+    def forward(self, x):   
+        # print('x',x.size())  
+        y = self.act(self.bn(self.conv(x)))
+        # print('y',y.size())
+        return y
+
+    def fuseforward(self, x):
+        return self.act(self.conv(x))
+
+
+class Bottleneck(nn.Module):
+    # Standard bottleneck
+    def __init__(self, c1, c2, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, shortcut, groups, expansion
+        super(Bottleneck, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_, c2, 3, 1, g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class BottleneckCSP(nn.Module):
+    # CSP Bottleneck https://github.com/WongKinYiu/CrossStagePartialNetworks
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(BottleneckCSP, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[Bottleneck(c_, c_, shortcut, g, e=1.0) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+
+
+class Focus(nn.Module):
+    # Focus wh information into c-space
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1, act=True):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Focus, self).__init__()
+        self.conv = Conv(c1 * 4, c2, k, s, p, g, act)
+
+    def forward(self, x):  # x(b,c,w,h) -> y(b,4c,w/2,h/2)
+        return self.conv(torch.cat([x[..., ::2, ::2], x[..., 1::2, ::2], x[..., ::2, 1::2], x[..., 1::2, 1::2]], 1))
+
+
+class Concat(nn.Module):
+    # Concatenate a list of tensors along dimension
+    def __init__(self, dimension=1):
+        super(Concat, self).__init__()
+        self.d = dimension
+
+    def forward(self, x):
+        # print('Concat x.size()',x.size())
+        y=torch.cat(x, self.d)
+        # print('Concat y.size()',y.size())
+        return y
+
+
+class NMS(nn.Module):
+    # Non-Maximum Suppression (NMS) module
+    conf = 0.25  # confidence threshold
+    iou = 0.45  # IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self):
+        super(NMS, self).__init__()
+
+    def forward(self, x):
+        return non_max_suppression(x[0], conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)
+
+
+class autoShape(nn.Module):
+    # input-robust model wrapper for passing cv2/np/PIL/torch inputs. Includes preprocessing, inference and NMS
+    img_size = 640  # inference size (pixels)
+    conf = 0.25  # NMS confidence threshold
+    iou = 0.45  # NMS IoU threshold
+    classes = None  # (optional list) filter by class
+
+    def __init__(self, model):
+        super(autoShape, self).__init__()
+        self.model = model.eval()
+
+    def forward(self, imgs, size=640, augment=False, profile=False):
+        # supports inference from various sources. For height=720, width=1280, RGB images example inputs are:
+        #   opencv:     imgs = cv2.imread('image.jpg')[:,:,::-1]  # HWC BGR to RGB x(720,1280,3)
+        #   PIL:        imgs = Image.open('image.jpg')  # HWC x(720,1280,3)
+        #   numpy:      imgs = np.zeros((720,1280,3))  # HWC
+        #   torch:      imgs = torch.zeros(16,3,720,1280)  # BCHW
+        #   multiple:   imgs = [Image.open('image1.jpg'), Image.open('image2.jpg'), ...]  # list of images
+
+        p = next(self.model.parameters())  # for device and type
+        if isinstance(imgs, torch.Tensor):  # torch
+            return self.model(imgs.to(p.device).type_as(p), augment, profile)  # inference
+
+        # Pre-process
+        if not isinstance(imgs, list):
+            imgs = [imgs]
+        shape0, shape1 = [], []  # image and inference shapes
+        batch = range(len(imgs))  # batch size
+        for i in batch:
+            imgs[i] = np.array(imgs[i])  # to numpy
+            imgs[i] = imgs[i][:, :, :3] if imgs[i].ndim == 3 else np.tile(imgs[i][:, :, None], 3)  # enforce 3ch input
+            s = imgs[i].shape[:2]  # HWC
+            shape0.append(s)  # image shape
+            g = (size / max(s))  # gain
+            shape1.append([y * g for y in s])
+        shape1 = [make_divisible(x, int(self.stride.max())) for x in np.stack(shape1, 0).max(0)]  # inference shape
+        x = [letterbox(imgs[i], new_shape=shape1, auto=False)[0] for i in batch]  # pad
+        x = np.stack(x, 0) if batch[-1] else x[0][None]  # stack
+        x = np.ascontiguousarray(x.transpose((0, 3, 1, 2)))  # BHWC to BCHW
+        x = torch.from_numpy(x).to(p.device).type_as(p) / 255.  # uint8 to fp16/32
+
+        # Inference
+        with torch.no_grad():
+            y = self.model(x, augment, profile)[0]  # forward
+        y = non_max_suppression(y, conf_thres=self.conf, iou_thres=self.iou, classes=self.classes)  # NMS
+
+        # Post-process
+        for i in batch:
+            if y[i] is not None:
+                y[i][:, :4] = scale_coords(shape1, y[i][:, :4], shape0[i])
+
+        return Detections(imgs, y, self.names)
+
+
+class Detections:
+    # detections class for YOLOv5 inference results
+    def __init__(self, imgs, pred, names=None):
+        super(Detections, self).__init__()
+        self.imgs = imgs  # list of images as numpy arrays
+        self.pred = pred  # list of tensors pred[0] = (xyxy, conf, cls)
+        self.names = names  # class names
+        self.xyxy = pred  # xyxy pixels
+        self.xywh = [xyxy2xywh(x) for x in pred]  # xywh pixels
+        gn = [torch.Tensor([*[im.shape[i] for i in [1, 0, 1, 0]], 1., 1.]) for im in imgs]  # normalization gains
+        self.xyxyn = [x / g for x, g in zip(self.xyxy, gn)]  # xyxy normalized
+        self.xywhn = [x / g for x, g in zip(self.xywh, gn)]  # xywh normalized
+
+    def display(self, pprint=False, show=False, save=False):
+        colors = color_list()
+        for i, (img, pred) in enumerate(zip(self.imgs, self.pred)):
+            str = f'Image {i + 1}/{len(self.pred)}: {img.shape[0]}x{img.shape[1]} '
+            if pred is not None:
+                for c in pred[:, -1].unique():
+                    n = (pred[:, -1] == c).sum()  # detections per class
+                    str += f'{n} {self.names[int(c)]}s, '  # add to string
+                if show or save:
+                    img = Image.fromarray(img.astype(np.uint8)) if isinstance(img, np.ndarray) else img  # from np
+                    for *box, conf, cls in pred:  # xyxy, confidence, class
+                        # str += '%s %.2f, ' % (names[int(cls)], conf)  # label
+                        ImageDraw.Draw(img).rectangle(box, width=4, outline=colors[int(cls) % 10])  # plot
+            if save:
+                f = f'results{i}.jpg'
+                str += f"saved to '{f}'"
+                img.save(f)  # save
+            if show:
+                img.show(f'Image {i}')  # show
+            if pprint:
+                print(str)
+
+    def print(self):
+        self.display(pprint=True)  # print results
+
+    def show(self):
+        self.display(show=True)  # show results
+
+    def save(self):
+        self.display(save=True)  # save results
+
+
+class Flatten(nn.Module):
+    # Use after nn.AdaptiveAvgPool2d(1) to remove last 2 dimensions
+    @staticmethod
+    def forward(x):
+        return x.view(x.size(0), -1)
+
+
+class Classify(nn.Module):
+    # Classification head, i.e. x(b,c1,20,20) to x(b,c2)
+    def __init__(self, c1, c2, k=1, s=1, p=None, g=1):  # ch_in, ch_out, kernel, stride, padding, groups
+        super(Classify, self).__init__()
+        self.aap = nn.AdaptiveAvgPool2d(1)  # to x(b,c1,1,1)
+        self.conv = nn.Conv2d(c1, c2, k, s, autopad(k, p), groups=g, bias=False)  # to x(b,c2,1,1)
+        self.flat = Flatten()
+
+    def forward(self, x):
+        z = torch.cat([self.aap(y) for y in (x if isinstance(x, list) else [x])], 1)  # cat if list
+        return self.flat(self.conv(z))  # flatten to x(b,c2)
+
+
+class SPP(nn.Module):
+    # Spatial pyramid pooling layer used in YOLOv3-SPP
+    def __init__(self, c1, c2, k=(5, 9, 13)):
+        super(SPP, self).__init__()
+        c_ = c1 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = Conv(c_ * (len(k) + 1), c2, 1, 1)
+        self.m = nn.ModuleList([nn.MaxPool2d(kernel_size=x, stride=1, padding=x // 2) for x in k])
+
+    def forward(self, x):
+        x = self.cv1(x)
+        return self.cv2(torch.cat([x] + [m(x) for m in self.m], 1))
--- a/models/experimental.py
+++ b/models/experimental.py
@ -0,0 +1,152 @@
+# This file contains experimental modules
+
+import numpy as np
+import torch
+import torch.nn as nn
+
+from models.common import Conv, DWConv
+from utils.google_utils import attempt_download
+
+
+class CrossConv(nn.Module):
+    # Cross Convolution Downsample
+    def __init__(self, c1, c2, k=3, s=1, g=1, e=1.0, shortcut=False):
+        # ch_in, ch_out, kernel, stride, groups, expansion, shortcut
+        super(CrossConv, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, (1, k), (1, s))
+        self.cv2 = Conv(c_, c2, (k, 1), (s, 1), g=g)
+        self.add = shortcut and c1 == c2
+
+    def forward(self, x):
+        return x + self.cv2(self.cv1(x)) if self.add else self.cv2(self.cv1(x))
+
+
+class C3(nn.Module):
+    # Cross Convolution CSP
+    def __init__(self, c1, c2, n=1, shortcut=True, g=1, e=0.5):  # ch_in, ch_out, number, shortcut, groups, expansion
+        super(C3, self).__init__()
+        c_ = int(c2 * e)  # hidden channels
+        self.cv1 = Conv(c1, c_, 1, 1)
+        self.cv2 = nn.Conv2d(c1, c_, 1, 1, bias=False)
+        self.cv3 = nn.Conv2d(c_, c_, 1, 1, bias=False)
+        self.cv4 = Conv(2 * c_, c2, 1, 1)
+        self.bn = nn.BatchNorm2d(2 * c_)  # applied to cat(cv2, cv3)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+        self.m = nn.Sequential(*[CrossConv(c_, c_, 3, 1, g, 1.0, shortcut) for _ in range(n)])
+
+    def forward(self, x):
+        y1 = self.cv3(self.m(self.cv1(x)))
+        y2 = self.cv2(x)
+        return self.cv4(self.act(self.bn(torch.cat((y1, y2), dim=1))))
+
+
+class Sum(nn.Module):
+    # Weighted sum of 2 or more layers https://arxiv.org/abs/1911.09070
+    def __init__(self, n, weight=False):  # n: number of inputs
+        super(Sum, self).__init__()
+        self.weight = weight  # apply weights boolean
+        self.iter = range(n - 1)  # iter object
+        if weight:
+            self.w = nn.Parameter(-torch.arange(1., n) / 2, requires_grad=True)  # layer weights
+
+    def forward(self, x):
+        y = x[0]  # no weight
+        if self.weight:
+            w = torch.sigmoid(self.w) * 2
+            for i in self.iter:
+                y = y + x[i + 1] * w[i]
+        else:
+            for i in self.iter:
+                y = y + x[i + 1]
+        return y
+
+
+class GhostConv(nn.Module):
+    # Ghost Convolution https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k=1, s=1, g=1, act=True):  # ch_in, ch_out, kernel, stride, groups
+        super(GhostConv, self).__init__()
+        c_ = c2 // 2  # hidden channels
+        self.cv1 = Conv(c1, c_, k, s, None, g, act)
+        self.cv2 = Conv(c_, c_, 5, 1, None, c_, act)
+
+    def forward(self, x):
+        y = self.cv1(x)
+        return torch.cat([y, self.cv2(y)], 1)
+
+
+class GhostBottleneck(nn.Module):
+    # Ghost Bottleneck https://github.com/huawei-noah/ghostnet
+    def __init__(self, c1, c2, k, s):
+        super(GhostBottleneck, self).__init__()
+        c_ = c2 // 2
+        self.conv = nn.Sequential(GhostConv(c1, c_, 1, 1),  # pw
+                                  DWConv(c_, c_, k, s, act=False) if s == 2 else nn.Identity(),  # dw
+                                  GhostConv(c_, c2, 1, 1, act=False))  # pw-linear
+        self.shortcut = nn.Sequential(DWConv(c1, c1, k, s, act=False),
+                                      Conv(c1, c2, 1, 1, act=False)) if s == 2 else nn.Identity()
+
+    def forward(self, x):
+        return self.conv(x) + self.shortcut(x)
+
+
+class MixConv2d(nn.Module):
+    # Mixed Depthwise Conv https://arxiv.org/abs/1907.09595
+    def __init__(self, c1, c2, k=(1, 3), s=1, equal_ch=True):
+        super(MixConv2d, self).__init__()
+        groups = len(k)
+        if equal_ch:  # equal c_ per group
+            i = torch.linspace(0, groups - 1E-6, c2).floor()  # c2 indices
+            c_ = [(i == g).sum() for g in range(groups)]  # intermediate channels
+        else:  # equal weight.numel() per group
+            b = [c2] + [0] * groups
+            a = np.eye(groups + 1, groups, k=-1)
+            a -= np.roll(a, 1, axis=1)
+            a *= np.array(k) ** 2
+            a[0] = 1
+            c_ = np.linalg.lstsq(a, b, rcond=None)[0].round()  # solve for equal weight indices, ax = b
+
+        self.m = nn.ModuleList([nn.Conv2d(c1, int(c_[g]), k[g], s, k[g] // 2, bias=False) for g in range(groups)])
+        self.bn = nn.BatchNorm2d(c2)
+        self.act = nn.LeakyReLU(0.1, inplace=True)
+
+    def forward(self, x):
+        return x + self.act(self.bn(torch.cat([m(x) for m in self.m], 1)))
+
+
+class Ensemble(nn.ModuleList):
+    # Ensemble of models
+    def __init__(self):
+        super(Ensemble, self).__init__()
+
+    def forward(self, x, augment=False):
+        y = []
+        for module in self:
+            y.append(module(x, augment)[0])
+        # y = torch.stack(y).max(0)[0]  # max ensemble
+        # y = torch.cat(y, 1)  # nms ensemble
+        y = torch.stack(y).mean(0)  # mean ensemble
+        return y, None  # inference, train output
+
+
+def attempt_load(weights, map_location=None):
+    # Loads an ensemble of models weights=[a,b,c] or a single model weights=[a] or weights=a
+    model = Ensemble()
+    for w in weights if isinstance(weights, list) else [weights]:
+        ckpt = torch.load(w, map_location=map_location)
+        model.append( ckpt['model'].float().fuse().eval() )  # load FP32 model
+
+    # Compatibility updates
+    for m in model.modules():
+        if type(m) in [nn.Hardswish, nn.LeakyReLU, nn.ReLU, nn.ReLU6]:
+            m.inplace = True  # pytorch 1.7.0 compatibility
+        elif type(m) is Conv:
+            m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+
+    if len(model) == 1:
+        return model[-1]  # return model
+    else:
+        print('Ensemble created with %s\n' % weights)
+        for k in ['names', 'stride']:
+            setattr(model, k, getattr(model[-1], k))
+        return model  # return ensemble
--- a/models/export.py
+++ b/models/export.py
@ -0,0 +1,94 @@
+"""Exports a YOLOv5 *.pt model to ONNX and TorchScript formats
+
+Usage:
+    $ export PYTHONPATH="$PWD" && python models/export.py --weights ./weights/yolov5s.pt --img 640 --batch 1
+"""
+
+import argparse
+import sys
+import time
+
+sys.path.append('./')  # to run '$ python *.py' files in subdirectories
+
+import torch
+import torch.nn as nn
+
+import models
+from models.experimental import attempt_load
+from utils.activations import Hardswish
+from utils.general import set_logging, check_img_size
+
+if __name__ == '__main__':
+    parser = argparse.ArgumentParser()
+    parser.add_argument('--weights', type=str, default='./yolov5s.pt', help='weights path')  # from yolov5/models/
+    parser.add_argument('--img-size', nargs='+', type=int, default=[640, 640], help='image size')  # height, width
+    parser.add_argument('--batch-size', type=int, default=1, help='batch size')
+    opt = parser.parse_args()
+    opt.img_size *= 2 if len(opt.img_size) == 1 else 1  # expand
+    print(opt)
+    set_logging()
+    t = time.time()
+
+    # Load PyTorch model
+    model = attempt_load(opt.weights, map_location=torch.device('cpu'))  # load FP32 model
+    labels = model.names
+
+    # Checks
+    gs = int(max(model.stride))  # grid size (max stride)
+    opt.img_size = [check_img_size(x, gs) for x in opt.img_size]  # verify img_size are gs-multiples
+
+    # Input
+    img = torch.zeros(opt.batch_size, 3, *opt.img_size)  # image size(1,3,320,192) iDetection
+
+    # Update model
+    for k, m in model.named_modules():
+        m._non_persistent_buffers_set = set()  # pytorch 1.6.0 compatibility
+        if isinstance(m, models.common.Conv) and isinstance(m.act, nn.Hardswish):
+            m.act = Hardswish()  # assign activation
+        # if isinstance(m, models.yolo.Detect):
+        #     m.forward = m.forward_export  # assign forward (optional)
+    model.model[-1].export = True  # set Detect() layer export=True
+    y = model(img)  # dry run
+
+    # TorchScript export
+    try:
+        print('\nStarting TorchScript export with torch %s...' % torch.__version__)
+        f = opt.weights.replace('.pt', '.torchscript.pt')  # filename
+        ts = torch.jit.trace(model, img)
+        ts.save(f)
+        print('TorchScript export success, saved as %s' % f)
+    except Exception as e:
+        print('TorchScript export failure: %s' % e)
+
+    # ONNX export
+    try:
+        import onnx
+
+        print('\nStarting ONNX export with onnx %s...' % onnx.__version__)
+        f = opt.weights.replace('.pt', '.onnx')  # filename
+        torch.onnx.export(model, img, f, verbose=False, opset_version=12, input_names=['images'],
+                          output_names=['classes', 'boxes'] if y is None else ['output'])
+
+        # Checks
+        onnx_model = onnx.load(f)  # load onnx model
+        onnx.checker.check_model(onnx_model)  # check onnx model
+        # print(onnx.helper.printable_graph(onnx_model.graph))  # print a human readable model
+        print('ONNX export success, saved as %s' % f)
+    except Exception as e:
+        print('ONNX export failure: %s' % e)
+
+    # CoreML export
+    try:
+        import coremltools as ct
+
+        print('\nStarting CoreML export with coremltools %s...' % ct.__version__)
+        # convert model from torchscript and apply pixel scaling as per detect.py
+        model = ct.convert(ts, inputs=[ct.ImageType(name='image', shape=img.shape, scale=1 / 255.0, bias=[0, 0, 0])])
+        f = opt.weights.replace('.pt', '.mlmodel')  # filename
+        model.save(f)
+        print('CoreML export success, saved as %s' % f)
+    except Exception as e:
+        print('CoreML export failure: %s' % e)
+
+    # Finish
+    print('\nExport complete (%.2fs). Visualize with https://github.com/lutzroeder/netron.' % (time.time() - t))
--- a/Show More
+++ b/Show More
				`@ -0,0 +1,2 @@`
				`from . import ColorConversion, Padding, Resize, Crop, Normalize, Rotate`