embree: Update to 4.3.1

This commit is contained in:
Jakub Mateusz Marcowski 2024-02-24 12:40:55 +01:00 committed by Jakub Marcowski
parent d2f9245ddc
commit c43eab55a4
No known key found for this signature in database
GPG Key ID: 10D9E07CFFBC0E6F
219 changed files with 12630 additions and 6085 deletions

View File

@ -15,10 +15,10 @@ if env["builtin_embree"]:
embree_src = [
"common/sys/sysinfo.cpp",
"common/sys/alloc.cpp",
"common/sys/estring.cpp",
"common/sys/filename.cpp",
"common/sys/library.cpp",
"common/sys/thread.cpp",
"common/sys/string.cpp",
"common/sys/regression.cpp",
"common/sys/mutex.cpp",
"common/sys/condition.cpp",
@ -36,6 +36,7 @@ if env["builtin_embree"]:
"kernels/common/rtcore.cpp",
"kernels/common/rtcore_builder.cpp",
"kernels/common/scene.cpp",
"kernels/common/scene_verify.cpp",
"kernels/common/alloc.cpp",
"kernels/common/geometry.cpp",
"kernels/common/scene_triangle_mesh.cpp",
@ -56,8 +57,6 @@ if env["builtin_embree"]:
"kernels/bvh/bvh_builder_twolevel.cpp",
"kernels/bvh/bvh_intersector1_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_filters.cpp",
]
thirdparty_sources = [thirdparty_dir + file for file in embree_src]

View File

@ -1,6 +1,13 @@
import glob, os, shutil, subprocess, re
import glob
import os
import re
import shutil
import stat
import subprocess
from types import TracebackType
from typing import Any, Callable, Tuple, Type
git_tag = "v3.13.5"
git_tag = "v4.3.1"
include_dirs = [
"common/tasking",
@ -15,7 +22,7 @@ include_dirs = [
"common/simd",
"common/simd/arm",
"common/simd/wasm",
"include/embree3",
"include/embree4",
"kernels/subdiv",
"kernels/geometry",
]
@ -23,10 +30,10 @@ include_dirs = [
cpp_files = [
"common/sys/sysinfo.cpp",
"common/sys/alloc.cpp",
"common/sys/estring.cpp",
"common/sys/filename.cpp",
"common/sys/library.cpp",
"common/sys/thread.cpp",
"common/sys/string.cpp",
"common/sys/regression.cpp",
"common/sys/mutex.cpp",
"common/sys/condition.cpp",
@ -44,6 +51,7 @@ cpp_files = [
"kernels/common/rtcore.cpp",
"kernels/common/rtcore_builder.cpp",
"kernels/common/scene.cpp",
"kernels/common/scene_verify.cpp",
"kernels/common/alloc.cpp",
"kernels/common/geometry.cpp",
"kernels/common/scene_triangle_mesh.cpp",
@ -65,26 +73,58 @@ cpp_files = [
"kernels/bvh/bvh_intersector1.cpp",
"kernels/bvh/bvh_intersector1_bvh4.cpp",
"kernels/bvh/bvh_intersector_hybrid4_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_bvh4.cpp",
"kernels/bvh/bvh_intersector_stream_filters.cpp",
"kernels/bvh/bvh_intersector_hybrid.cpp",
"kernels/bvh/bvh_intersector_stream.cpp",
]
os.chdir("../../thirdparty")
config_files = [
"kernels/config.h.in",
"kernels/rtcore_config.h.in",
]
license_file = "LICENSE.txt"
os.chdir(f"{os.path.dirname(__file__)}/../../thirdparty")
dir_name = "embree"
if os.path.exists(dir_name):
shutil.rmtree(dir_name)
# In case something went wrong and embree-tmp stayed on the system.
if os.path.exists("embree-tmp"):
shutil.rmtree("embree-tmp")
subprocess.run(["git", "clone", "https://github.com/embree/embree.git", "embree-tmp"])
os.chdir("embree-tmp")
subprocess.run(["git", "checkout", git_tag])
commit_hash = str(subprocess.check_output(["git", "rev-parse", "HEAD"], universal_newlines=True)).strip()
def on_rm_error(
function: Callable[..., Any], path: str, excinfo: Tuple[Type[Exception], Exception, TracebackType]
) -> None:
"""
Error handler for `shutil.rmtree()`.
If the error is due to read-only files,
it will change the file permissions and retry.
"""
os.chmod(path, stat.S_IWRITE)
os.unlink(path)
# 3.12 Python and beyond should replace `onerror` with `onexc`.
# We remove the .git directory because it contains
# a lot of read-only files that are problematic on Windows.
shutil.rmtree(".git", onerror=on_rm_error)
all_files = set(cpp_files)
for config_file in config_files:
all_files.add(config_file)
all_files.add(license_file)
dest_dir = os.path.join("..", dir_name)
for include_dir in include_dirs:
headers = glob.iglob(os.path.join(include_dir, "*.h"))
@ -105,87 +145,8 @@ with open(os.path.join(dest_dir, "kernels/hash.h"), "w", encoding="utf-8", newli
"""
)
with open(os.path.join(dest_dir, "kernels/config.h"), "w", encoding="utf-8", newline="\n") as config_file:
config_file.write(
"""// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
/* #undef EMBREE_RAY_MASK */
/* #undef EMBREE_STAT_COUNTERS */
/* #undef EMBREE_BACKFACE_CULLING */
/* #undef EMBREE_BACKFACE_CULLING_CURVES */
#define EMBREE_FILTER_FUNCTION
/* #undef EMBREE_IGNORE_INVALID_RAYS */
#define EMBREE_GEOMETRY_TRIANGLE
/* #undef EMBREE_GEOMETRY_QUAD */
/* #undef EMBREE_GEOMETRY_CURVE */
/* #undef EMBREE_GEOMETRY_SUBDIVISION */
/* #undef EMBREE_GEOMETRY_USER */
/* #undef EMBREE_GEOMETRY_INSTANCE */
/* #undef EMBREE_GEOMETRY_GRID */
/* #undef EMBREE_GEOMETRY_POINT */
#define EMBREE_RAY_PACKETS
/* #undef EMBREE_COMPACT_POLYS */
#define EMBREE_CURVE_SELF_INTERSECTION_AVOIDANCE_FACTOR 2.0
#define EMBREE_DISC_POINT_SELF_INTERSECTION_AVOIDANCE
#if defined(EMBREE_GEOMETRY_TRIANGLE)
#define IF_ENABLED_TRIS(x) x
#else
#define IF_ENABLED_TRIS(x)
#endif
#if defined(EMBREE_GEOMETRY_QUAD)
#define IF_ENABLED_QUADS(x) x
#else
#define IF_ENABLED_QUADS(x)
#endif
#if defined(EMBREE_GEOMETRY_CURVE) || defined(EMBREE_GEOMETRY_POINT)
#define IF_ENABLED_CURVES_OR_POINTS(x) x
#else
#define IF_ENABLED_CURVES_OR_POINTS(x)
#endif
#if defined(EMBREE_GEOMETRY_CURVE)
#define IF_ENABLED_CURVES(x) x
#else
#define IF_ENABLED_CURVES(x)
#endif
#if defined(EMBREE_GEOMETRY_POINT)
#define IF_ENABLED_POINTS(x) x
#else
#define IF_ENABLED_POINTS(x)
#endif
#if defined(EMBREE_GEOMETRY_SUBDIVISION)
#define IF_ENABLED_SUBDIV(x) x
#else
#define IF_ENABLED_SUBDIV(x)
#endif
#if defined(EMBREE_GEOMETRY_USER)
#define IF_ENABLED_USER(x) x
#else
#define IF_ENABLED_USER(x)
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
#define IF_ENABLED_INSTANCE(x) x
#else
#define IF_ENABLED_INSTANCE(x)
#endif
#if defined(EMBREE_GEOMETRY_GRID)
#define IF_ENABLED_GRIDS(x) x
#else
#define IF_ENABLED_GRIDS(x)
#endif
"""
)
for config_file in config_files:
os.rename(os.path.join(dest_dir, config_file), os.path.join(dest_dir, config_file[:-3]))
with open("CMakeLists.txt", "r", encoding="utf-8") as cmake_file:
cmake_content = cmake_file.read()
@ -193,70 +154,25 @@ with open("CMakeLists.txt", "r", encoding="utf-8") as cmake_file:
minor_version = int(re.compile(r"EMBREE_VERSION_MINOR\s(\d+)").findall(cmake_content)[0])
patch_version = int(re.compile(r"EMBREE_VERSION_PATCH\s(\d+)").findall(cmake_content)[0])
shutil.move(os.path.join(dest_dir, "kernels/rtcore_config.h"), os.path.join(dest_dir, ("include/embree4/")))
with open(
os.path.join(dest_dir, "include/embree3/rtcore_config.h"), "w", encoding="utf-8", newline="\n"
) as config_file:
config_file.write(
f"""// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#define RTC_VERSION_MAJOR {major_version}
#define RTC_VERSION_MINOR {minor_version}
#define RTC_VERSION_PATCH {patch_version}
#define RTC_VERSION {major_version}{minor_version:02d}{patch_version:02d}
#define RTC_VERSION_STRING "{major_version}.{minor_version}.{patch_version}"
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
#define EMBREE_MIN_WIDTH 0
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
#if !defined(EMBREE_STATIC_LIB)
# define EMBREE_STATIC_LIB
#endif
/* #undef EMBREE_API_NAMESPACE*/
#if defined(EMBREE_API_NAMESPACE)
# define RTC_NAMESPACE
# define RTC_NAMESPACE_BEGIN namespace {{
# define RTC_NAMESPACE_END }}
# define RTC_NAMESPACE_USE using namespace;
# define RTC_API_EXTERN_C
# undef EMBREE_API_NAMESPACE
#else
# define RTC_NAMESPACE_BEGIN
# define RTC_NAMESPACE_END
# define RTC_NAMESPACE_USE
# if defined(__cplusplus)
# define RTC_API_EXTERN_C extern "C"
# else
# define RTC_API_EXTERN_C
# endif
#endif
#if defined(ISPC)
# define RTC_API_IMPORT extern "C" unmasked
# define RTC_API_EXPORT extern "C" unmasked
#elif defined(EMBREE_STATIC_LIB)
# define RTC_API_IMPORT RTC_API_EXTERN_C
# define RTC_API_EXPORT RTC_API_EXTERN_C
#elif defined(_WIN32)
# define RTC_API_IMPORT RTC_API_EXTERN_C __declspec(dllimport)
# define RTC_API_EXPORT RTC_API_EXTERN_C __declspec(dllexport)
#else
# define RTC_API_IMPORT RTC_API_EXTERN_C
# define RTC_API_EXPORT RTC_API_EXTERN_C __attribute__ ((visibility ("default")))
#endif
#if defined(RTC_EXPORT_API)
# define RTC_API RTC_API_EXPORT
#else
# define RTC_API RTC_API_IMPORT
#endif
"""
)
os.path.join(dest_dir, "include/embree4/rtcore_config.h"), "r+", encoding="utf-8", newline="\n"
) as rtcore_config:
lines = rtcore_config.readlines()
rtcore_config.seek(0)
for i, line in enumerate(lines):
if line.startswith("#define RTC_VERSION_MAJOR"):
lines[i : i + 5] = [
f"#define RTC_VERSION_MAJOR {major_version}\n",
f"#define RTC_VERSION_MINOR {minor_version}\n",
f"#define RTC_VERSION_PATCH {patch_version}\n",
f"#define RTC_VERSION {major_version}{minor_version:02d}{patch_version:02d}\n",
f'#define RTC_VERSION_STRING "{major_version}.{minor_version}.{patch_version}"\n',
]
break
rtcore_config.writelines(lines)
rtcore_config.truncate()
os.chdir("..")
shutil.rmtree("embree-tmp")
@ -264,4 +180,4 @@ shutil.rmtree("embree-tmp")
subprocess.run(["git", "restore", "embree/patches"])
for patch in os.listdir("embree/patches"):
subprocess.run(["git", "apply", "embree/patches/" + patch])
subprocess.run(["git", "apply", f"embree/patches/{patch}"])

View File

@ -69,11 +69,12 @@ void LightmapRaycasterEmbree::filter_function(const struct RTCFilterFunctionNArg
}
bool LightmapRaycasterEmbree::intersect(Ray &r_ray) {
RTCIntersectContext context;
rtcInitIntersectContext(&context);
rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray);
RTCRayQueryContext context;
rtcInitRayQueryContext(&context);
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.context = &context;
rtcIntersect1(embree_scene, (RTCRayHit *)&r_ray, &args);
return r_ray.geomID != RTC_INVALID_GEOMETRY_ID;
}

View File

@ -37,7 +37,7 @@
#include "core/object/object.h"
#include "scene/3d/lightmapper.h"
#include <embree3/rtcore.h>
#include <embree4/rtcore.h>
class LightmapRaycasterEmbree : public LightmapRaycaster {
GDCLASS(LightmapRaycasterEmbree, LightmapRaycaster);

View File

@ -488,11 +488,13 @@ void RaycastOcclusionCull::Scenario::update() {
}
void RaycastOcclusionCull::Scenario::_raycast(uint32_t p_idx, const RaycastThreadData *p_raycast_data) const {
RTCIntersectContext ctx;
rtcInitIntersectContext(&ctx);
ctx.flags = RTC_INTERSECT_CONTEXT_FLAG_COHERENT;
rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &ctx, &p_raycast_data->rays[p_idx]);
RTCRayQueryContext context;
rtcInitRayQueryContext(&context);
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.flags = RTC_RAY_QUERY_FLAG_COHERENT;
args.context = &context;
rtcIntersect16((const int *)&p_raycast_data->masks[p_idx * TILE_RAYS], ebr_scene[current_scene_idx], &p_raycast_data->rays[p_idx], &args);
}
void RaycastOcclusionCull::Scenario::raycast(CameraRayTile *r_rays, const uint32_t *p_valid_masks, uint32_t p_tile_count) const {

View File

@ -40,7 +40,7 @@
#include "scene/resources/mesh.h"
#include "servers/rendering/renderer_scene_occlusion_cull.h"
#include <embree3/rtcore.h>
#include <embree4/rtcore.h>
class RaycastOcclusionCull : public RendererSceneOcclusionCull {
typedef RTCRayHit16 CameraRayTile;

View File

@ -53,9 +53,12 @@ void StaticRaycasterEmbree::free() {
}
bool StaticRaycasterEmbree::intersect(Ray &r_ray) {
RTCIntersectContext context;
rtcInitIntersectContext(&context);
rtcIntersect1(embree_scene, &context, (RTCRayHit *)&r_ray);
RTCRayQueryContext context;
rtcInitRayQueryContext(&context);
RTCIntersectArguments args;
rtcInitIntersectArguments(&args);
args.context = &context;
rtcIntersect1(embree_scene, (RTCRayHit *)&r_ray, &args);
return r_ray.geomID != RTC_INVALID_GEOMETRY_ID;
}

View File

@ -35,7 +35,7 @@
#include "core/math/static_raycaster.h"
#include <embree3/rtcore.h>
#include <embree4/rtcore.h>
class StaticRaycasterEmbree : public StaticRaycaster {
GDCLASS(StaticRaycasterEmbree, StaticRaycaster);

View File

@ -172,13 +172,15 @@ Files extracted from upstream source:
## embree
- Upstream: https://github.com/embree/embree
- Version: 3.13.5 (698442324ccddd11725fb8875275dc1384f7fb40, 2022)
- Version: 4.3.1 (daa8de0e714e18ad5e5c9841b67c1950d9c91c51, 2024)
- License: Apache 2.0
Files extracted from upstream:
- All `.cpp` files listed in `modules/raycast/godot_update_embree.py`
- All header files in the directories listed in `modules/raycast/godot_update_embree.py`
- All config files listed in `modules/raycast/godot_update_embree.py`
- `LICENSE.txt`
The `modules/raycast/godot_update_embree.py` script can be used to pull the
relevant files from the latest Embree release and apply some automatic changes.

202
thirdparty/embree/LICENSE.txt vendored Normal file
View File

@ -0,0 +1,202 @@
Apache License
Version 2.0, January 2004
http://www.apache.org/licenses/
TERMS AND CONDITIONS FOR USE, REPRODUCTION, AND DISTRIBUTION
1. Definitions.
"License" shall mean the terms and conditions for use, reproduction,
and distribution as defined by Sections 1 through 9 of this document.
"Licensor" shall mean the copyright owner or entity authorized by
the copyright owner that is granting the License.
"Legal Entity" shall mean the union of the acting entity and all
other entities that control, are controlled by, or are under common
control with that entity. For the purposes of this definition,
"control" means (i) the power, direct or indirect, to cause the
direction or management of such entity, whether by contract or
otherwise, or (ii) ownership of fifty percent (50%) or more of the
outstanding shares, or (iii) beneficial ownership of such entity.
"You" (or "Your") shall mean an individual or Legal Entity
exercising permissions granted by this License.
"Source" form shall mean the preferred form for making modifications,
including but not limited to software source code, documentation
source, and configuration files.
"Object" form shall mean any form resulting from mechanical
transformation or translation of a Source form, including but
not limited to compiled object code, generated documentation,
and conversions to other media types.
"Work" shall mean the work of authorship, whether in Source or
Object form, made available under the License, as indicated by a
copyright notice that is included in or attached to the work
(an example is provided in the Appendix below).
"Derivative Works" shall mean any work, whether in Source or Object
form, that is based on (or derived from) the Work and for which the
editorial revisions, annotations, elaborations, or other modifications
represent, as a whole, an original work of authorship. For the purposes
of this License, Derivative Works shall not include works that remain
separable from, or merely link (or bind by name) to the interfaces of,
the Work and Derivative Works thereof.
"Contribution" shall mean any work of authorship, including
the original version of the Work and any modifications or additions
to that Work or Derivative Works thereof, that is intentionally
submitted to Licensor for inclusion in the Work by the copyright owner
or by an individual or Legal Entity authorized to submit on behalf of
the copyright owner. For the purposes of this definition, "submitted"
means any form of electronic, verbal, or written communication sent
to the Licensor or its representatives, including but not limited to
communication on electronic mailing lists, source code control systems,
and issue tracking systems that are managed by, or on behalf of, the
Licensor for the purpose of discussing and improving the Work, but
excluding communication that is conspicuously marked or otherwise
designated in writing by the copyright owner as "Not a Contribution."
"Contributor" shall mean Licensor and any individual or Legal Entity
on behalf of whom a Contribution has been received by Licensor and
subsequently incorporated within the Work.
2. Grant of Copyright License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
copyright license to reproduce, prepare Derivative Works of,
publicly display, publicly perform, sublicense, and distribute the
Work and such Derivative Works in Source or Object form.
3. Grant of Patent License. Subject to the terms and conditions of
this License, each Contributor hereby grants to You a perpetual,
worldwide, non-exclusive, no-charge, royalty-free, irrevocable
(except as stated in this section) patent license to make, have made,
use, offer to sell, sell, import, and otherwise transfer the Work,
where such license applies only to those patent claims licensable
by such Contributor that are necessarily infringed by their
Contribution(s) alone or by combination of their Contribution(s)
with the Work to which such Contribution(s) was submitted. If You
institute patent litigation against any entity (including a
cross-claim or counterclaim in a lawsuit) alleging that the Work
or a Contribution incorporated within the Work constitutes direct
or contributory patent infringement, then any patent licenses
granted to You under this License for that Work shall terminate
as of the date such litigation is filed.
4. Redistribution. You may reproduce and distribute copies of the
Work or Derivative Works thereof in any medium, with or without
modifications, and in Source or Object form, provided that You
meet the following conditions:
(a) You must give any other recipients of the Work or
Derivative Works a copy of this License; and
(b) You must cause any modified files to carry prominent notices
stating that You changed the files; and
(c) You must retain, in the Source form of any Derivative Works
that You distribute, all copyright, patent, trademark, and
attribution notices from the Source form of the Work,
excluding those notices that do not pertain to any part of
the Derivative Works; and
(d) If the Work includes a "NOTICE" text file as part of its
distribution, then any Derivative Works that You distribute must
include a readable copy of the attribution notices contained
within such NOTICE file, excluding those notices that do not
pertain to any part of the Derivative Works, in at least one
of the following places: within a NOTICE text file distributed
as part of the Derivative Works; within the Source form or
documentation, if provided along with the Derivative Works; or,
within a display generated by the Derivative Works, if and
wherever such third-party notices normally appear. The contents
of the NOTICE file are for informational purposes only and
do not modify the License. You may add Your own attribution
notices within Derivative Works that You distribute, alongside
or as an addendum to the NOTICE text from the Work, provided
that such additional attribution notices cannot be construed
as modifying the License.
You may add Your own copyright statement to Your modifications and
may provide additional or different license terms and conditions
for use, reproduction, or distribution of Your modifications, or
for any such Derivative Works as a whole, provided Your use,
reproduction, and distribution of the Work otherwise complies with
the conditions stated in this License.
5. Submission of Contributions. Unless You explicitly state otherwise,
any Contribution intentionally submitted for inclusion in the Work
by You to the Licensor shall be under the terms and conditions of
this License, without any additional terms or conditions.
Notwithstanding the above, nothing herein shall supersede or modify
the terms of any separate license agreement you may have executed
with Licensor regarding such Contributions.
6. Trademarks. This License does not grant permission to use the trade
names, trademarks, service marks, or product names of the Licensor,
except as required for reasonable and customary use in describing the
origin of the Work and reproducing the content of the NOTICE file.
7. Disclaimer of Warranty. Unless required by applicable law or
agreed to in writing, Licensor provides the Work (and each
Contributor provides its Contributions) on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or
implied, including, without limitation, any warranties or conditions
of TITLE, NON-INFRINGEMENT, MERCHANTABILITY, or FITNESS FOR A
PARTICULAR PURPOSE. You are solely responsible for determining the
appropriateness of using or redistributing the Work and assume any
risks associated with Your exercise of permissions under this License.
8. Limitation of Liability. In no event and under no legal theory,
whether in tort (including negligence), contract, or otherwise,
unless required by applicable law (such as deliberate and grossly
negligent acts) or agreed to in writing, shall any Contributor be
liable to You for damages, including any direct, indirect, special,
incidental, or consequential damages of any character arising as a
result of this License or out of the use or inability to use the
Work (including but not limited to damages for loss of goodwill,
work stoppage, computer failure or malfunction, or any and all
other commercial damages or losses), even if such Contributor
has been advised of the possibility of such damages.
9. Accepting Warranty or Additional Liability. While redistributing
the Work or Derivative Works thereof, You may choose to offer,
and charge a fee for, acceptance of support, warranty, indemnity,
or other liability obligations and/or rights consistent with this
License. However, in accepting such obligations, You may act only
on Your own behalf and on Your sole responsibility, not on behalf
of any other Contributor, and only if You agree to indemnify,
defend, and hold each Contributor harmless for any liability
incurred by, or claims asserted against, such Contributor by reason
of your accepting any such warranty or additional liability.
END OF TERMS AND CONDITIONS
APPENDIX: How to apply the Apache License to your work.
To apply the Apache License to your work, attach the following
boilerplate notice, with the fields enclosed by brackets "[]"
replaced with your own identifying information. (Don't include
the brackets!) The text should be enclosed in the appropriate
comment syntax for the file format. We also recommend that a
file or class name and description of purpose be included on the
same "printed page" as the copyright notice for easier
identification within third-party archives.
Copyright [yyyy] [name of copyright owner]
Licensed under the Apache License, Version 2.0 (the "License");
you may not use this file except in compliance with the License.
You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing, software
distributed under the License is distributed on an "AS IS" BASIS,
WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
See the License for the specific language governing permissions and
limitations under the License.

View File

@ -12,7 +12,8 @@ namespace embree
template<typename Index, class UnaryPredicate>
__forceinline bool parallel_any_of (Index first, Index last, UnaryPredicate pred)
{
bool ret = false;
std::atomic_bool ret;
ret = false;
#if defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002

View File

@ -5,7 +5,7 @@
#include "../tasking/taskscheduler.h"
#include "../sys/array.h"
#include "../math/math.h"
#include "../math/emath.h"
#include "../math/range.h"
namespace embree
@ -14,17 +14,17 @@ namespace embree
template<typename Index, typename Func>
__forceinline void parallel_for( const Index N, const Func& func)
{
#if defined(TASKING_INTERNAL)
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
if (N) {
TaskScheduler::TaskGroupContext context;
TaskScheduler::spawn(Index(0),N,Index(1),[&] (const range<Index>& r) {
assert(r.size() == 1);
func(r.begin());
});
if (!TaskScheduler::wait())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
},&context);
TaskScheduler::wait();
if (context.cancellingException != nullptr) {
std::rethrow_exception(context.cancellingException);
}
}
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
@ -33,19 +33,13 @@ namespace embree
func(i);
},context);
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
});
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#endif
#elif defined(TASKING_PPL)
@ -62,13 +56,13 @@ namespace embree
__forceinline void parallel_for( const Index first, const Index last, const Index minStepSize, const Func& func)
{
assert(first <= last);
#if defined(TASKING_INTERNAL)
TaskScheduler::spawn(first,last,minStepSize,func);
if (!TaskScheduler::wait())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
TaskScheduler::TaskGroupContext context;
TaskScheduler::spawn(first,last,minStepSize,func,&context);
TaskScheduler::wait();
if (context.cancellingException != nullptr) {
std::rethrow_exception(context.cancellingException);
}
#elif defined(TASKING_TBB)
#if TBB_INTERFACE_VERSION >= 12002
@ -77,19 +71,13 @@ namespace embree
func(range<Index>(r.begin(),r.end()));
},context);
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(tbb::blocked_range<Index>(first,last,minStepSize),[&](const tbb::blocked_range<Index>& r) {
func(range<Index>(r.begin(),r.end()));
});
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#endif
#elif defined(TASKING_PPL)
@ -121,19 +109,13 @@ namespace embree
func(i);
},tbb::simple_partitioner(),context);
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},tbb::simple_partitioner());
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#endif
}
@ -148,19 +130,13 @@ namespace embree
func(i);
},ap,context);
if (context.is_group_execution_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#else
tbb::parallel_for(Index(0),N,Index(1),[&](Index i) {
func(i);
},ap);
if (tbb::task::self().is_cancelled())
// -- GODOT start --
// throw std::runtime_error("task cancelled");
abort();
// -- GODOT end --
throw std::runtime_error("task cancelled");
#endif
}

View File

@ -175,8 +175,8 @@ namespace embree
/* calculate all left and right ranges that are on the wrong global side */
size_t numMisplacedRangesLeft = 0;
size_t numMisplacedRangesRight = 0;
size_t numMisplacedItemsLeft = 0;
size_t numMisplacedItemsRight = 0;
size_t numMisplacedItemsLeft MAYBE_UNUSED = 0;
size_t numMisplacedItemsRight MAYBE_UNUSED = 0;
for (size_t i=0; i<numTasks; i++)
{

View File

@ -43,7 +43,7 @@ namespace embree
template<typename Index, typename Value, typename Func, typename Reduction>
__forceinline Value parallel_reduce( const Index first, const Index last, const Index minStepSize, const Value& identity, const Func& func, const Reduction& reduction )
{
#if defined(TASKING_INTERNAL)
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
/* fast path for small number of iterations */
Index taskCount = (last-first+minStepSize-1)/minStepSize;

View File

@ -6,7 +6,7 @@
#include "../sys/platform.h"
#include "../sys/ref.h"
#include "../sys/filename.h"
#include "../sys/string.h"
#include "../sys/estring.h"
#include <vector>
#include <iostream>
@ -122,17 +122,16 @@ namespace embree
class FileStream : public Stream<int>
{
public:
FileStream (FILE* file, const std::string& name = "file")
: file(file), lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(name))) {}
FileStream (const FileName& fileName)
: lineNumber(1), colNumber(0), charNumber(0), name(std::shared_ptr<std::string>(new std::string(fileName.str())))
{
file = fopen(fileName.c_str(),"r");
if (file == nullptr) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
if (ifs) ifs.close();
ifs.open(fileName.str());
if (!ifs.is_open()) THROW_RUNTIME_ERROR("cannot open file " + fileName.str());
}
~FileStream() {
if (ifs) ifs.close();
}
~FileStream() { if (file) fclose(file); }
public:
ParseLocation location() {
@ -140,14 +139,15 @@ namespace embree
}
int next() {
int c = fgetc(file);
int c = ifs.get();
if (c == '\n') { lineNumber++; colNumber = 0; } else if (c != '\r') colNumber++;
charNumber++;
return c;
}
private:
FILE* file;
std::ifstream ifs;
ssize_t lineNumber; /// the line number the token is from
ssize_t colNumber; /// the character number in the current line
ssize_t charNumber; /// the character in the file

View File

@ -41,7 +41,9 @@ namespace embree
int c = cin->get();
// -- GODOT start --
// if (!isValidChar(c)) throw std::runtime_error("invalid character "+std::string(1,c)+" in input");
if (!isValidChar(c)) abort();
if (!isValidChar(c)) {
abort();
}
// -- GODOT end --
str.push_back((char)c);
}

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
#include "tokenstream.h"
#include "../math/math.h"
#include "../math/emath.h"
namespace embree
{

View File

@ -56,6 +56,11 @@ namespace embree
return BBox(min(a.lower, b.lower), max(a.upper, b.upper));
}
/*! intersects two boxes */
__forceinline static const BBox intersect (const BBox& a, const BBox& b) {
return BBox(max(a.lower, b.lower), min(a.upper, b.upper));
}
/*! enlarge box by some scaling factor */
__forceinline BBox enlarge_by(const float a) const {
return BBox(lower - T(a)*abs(lower), upper + T(a)*abs(upper));

View File

@ -3,7 +3,7 @@
#pragma once
#include "math.h"
#include "emath.h"
namespace embree
{

View File

@ -3,7 +3,7 @@
#pragma once
#include "math.h"
#include "emath.h"
namespace embree
{

View File

@ -3,6 +3,10 @@
#pragma once
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "color_sycl.h"
#else
#include "constants.h"
#include "col3.h"
#include "col4.h"
@ -64,6 +68,10 @@ namespace embree
d.b = (unsigned char)(s[2]);
d.a = (unsigned char)(s[3]);
}
__forceinline void set(float &f) const
{
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
@ -256,3 +264,5 @@ namespace embree
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}
#endif

View File

@ -0,0 +1,219 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "constants.h"
#include "col3.h"
#include "col4.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE RGBA Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color4
{
struct { float r,g,b,a; };
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4 () {}
//__forceinline Color4 ( const __m128 a ) : m128(a) {}
__forceinline explicit Color4 (const float v) : r(v), g(v), b(v), a(v) {}
__forceinline Color4 (const float r, const float g, const float b, const float a) : r(r), g(g), b(b), a(a) {}
__forceinline explicit Color4 ( const Col3uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(1.0f) {}
__forceinline explicit Color4 ( const Col3f& other ) : r(other.r), g(other.g), b(other.b), a(1.0f) {}
__forceinline explicit Color4 ( const Col4uc& other ) : r(other.r/255.0f), g(other.g/255.0f), b(other.b/255.0f), a(other.a/255.0f) {}
__forceinline explicit Color4 ( const Col4f& other ) : r(other.r), g(other.g), b(other.b), a(other.a) {}
//__forceinline Color4 ( const Color4& other ) : m128(other.m128) {}
//__forceinline Color4& operator=( const Color4& other ) { m128 = other.m128; return *this; }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = a; }
__forceinline void set(Col3uc& d) const
{
d.r = (unsigned char)(clamp(r)*255.0f);
d.g = (unsigned char)(clamp(g)*255.0f);
d.b = (unsigned char)(clamp(b)*255.0f);
}
__forceinline void set(Col4uc& d) const
{
d.r = (unsigned char)(clamp(r)*255.0f);
d.g = (unsigned char)(clamp(g)*255.0f);
d.b = (unsigned char)(clamp(b)*255.0f);
d.a = (unsigned char)(clamp(a)*255.0f);
}
__forceinline void set(float &f) const
{
f = 0.2126f*r+0.7125f*g+0.0722f*b; // sRGB luminance.
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color4( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f), a(0.0f) {}
__forceinline Color4( OneTy ) : r(1.0f), g(1.0f), b(1.0f), a(1.0f) {}
//__forceinline Color4( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
//__forceinline Color4( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// SSE RGB Color Class
////////////////////////////////////////////////////////////////////////////////
struct Color
{
struct { float r,g,b; };
////////////////////////////////////////////////////////////////////////////////
/// Construction
////////////////////////////////////////////////////////////////////////////////
__forceinline Color () {}
//__forceinline Color ( const __m128 a ) : m128(a) {}
__forceinline explicit Color (const float v) : r(v), g(v), b(v) {}
__forceinline Color (const float r, const float g, const float b) : r(r), g(g), b(b) {}
//__forceinline Color ( const Color& other ) : m128(other.m128) {}
//__forceinline Color& operator=( const Color& other ) { m128 = other.m128; return *this; }
//__forceinline Color ( const Color4& other ) : m128(other.m128) {}
//__forceinline Color& operator=( const Color4& other ) { m128 = other.m128; return *this; }
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Set
////////////////////////////////////////////////////////////////////////////////
__forceinline void set(Col3f& d) const { d.r = r; d.g = g; d.b = b; }
__forceinline void set(Col4f& d) const { d.r = r; d.g = g; d.b = b; d.a = 1.0f; }
#if 0
__forceinline void set(Col3uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
}
__forceinline void set(Col4uc& d) const
{
vfloat4 s = clamp(vfloat4(m128))*255.0f;
d.r = (unsigned char)(s[0]);
d.g = (unsigned char)(s[1]);
d.b = (unsigned char)(s[2]);
d.a = 255;
}
#endif
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Color( ZeroTy ) : r(0.0f), g(0.0f), b(0.0f) {}
__forceinline Color( OneTy ) : r(1.0f), g(1.0f), b(1.0f) {}
//__forceinline Color( PosInfTy ) : m128(_mm_set1_ps(pos_inf)) {}
//__forceinline Color( NegInfTy ) : m128(_mm_set1_ps(neg_inf)) {}
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a ) { return a; }
__forceinline const Color operator -( const Color& a ) { return Color(-a.r, -a.g, -a.b); }
__forceinline const Color abs ( const Color& a ) { return Color(abs(a.r), abs(a.g), abs(a.b)); }
__forceinline const Color rcp ( const Color& a ) { return Color(1.0f/a.r, 1.0f/a.g, 1.0f/a.b); }
__forceinline const Color rsqrt( const Color& a ) { return Color(1.0f/sqrt(a.r), 1.0f/sqrt(a.g), 1.0f/sqrt(a.b)); }
__forceinline const Color sqrt ( const Color& a ) { return Color(sqrt(a.r), sqrt(a.g), sqrt(a.b)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator +( const Color& a, const Color& b ) { return Color(a.r+b.r, a.g+b.g, a.b+b.b); }
__forceinline const Color operator -( const Color& a, const Color& b ) { return Color(a.r-b.r, a.g-b.g, a.b-b.b); }
__forceinline const Color operator *( const Color& a, const Color& b ) { return Color(a.r*b.r, a.g*b.g, a.b*b.b); }
__forceinline const Color operator *( const Color& a, const float b ) { return a * Color(b); }
__forceinline const Color operator *( const float a, const Color& b ) { return Color(a) * b; }
__forceinline const Color operator /( const Color& a, const Color& b ) { return a * rcp(b); }
__forceinline const Color operator /( const Color& a, const float b ) { return a * rcp(b); }
__forceinline const Color min( const Color& a, const Color& b ) { return Color(min(a.r,b.r), min(a.g,b.g), min(a.b,b.b)); }
__forceinline const Color max( const Color& a, const Color& b ) { return Color(max(a.r,b.r), max(a.g,b.g), max(a.b,b.b)); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color operator+=(Color& a, const Color& b) { return a = a + b; }
__forceinline const Color operator-=(Color& a, const Color& b) { return a = a - b; }
__forceinline const Color operator*=(Color& a, const Color& b) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const Color& b) { return a = a / b; }
__forceinline const Color operator*=(Color& a, const float b ) { return a = a * b; }
__forceinline const Color operator/=(Color& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Color& v) { return v.r+v.g+v.b; }
__forceinline float reduce_mul(const Color& v) { return v.r*v.g*v.b; }
__forceinline float reduce_min(const Color& v) { return min(v.r,v.g,v.b); }
__forceinline float reduce_max(const Color& v) { return max(v.r,v.g,v.b); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Color& a, const Color& b ) { return a.r == b.r && a.g == b.g && a.b == b.b; }
__forceinline bool operator !=( const Color& a, const Color& b ) { return a.r != b.r || a.g != b.g || a.b != b.b; }
__forceinline bool operator < ( const Color& a, const Color& b ) {
if (a.r != b.r) return a.r < b.r;
if (a.g != b.g) return a.g < b.g;
if (a.b != b.b) return a.b < b.b;
return false;
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline const Color select( bool s, const Color& t, const Color& f ) {
return s ? t : f;
}
////////////////////////////////////////////////////////////////////////////////
/// Special Operators
////////////////////////////////////////////////////////////////////////////////
/*! computes luminance of a color */
__forceinline float luminance (const Color& a) { return madd(0.212671f,a.r,madd(0.715160f,a.g,0.072169f*a.b)); }
/*! output operator */
inline std::ostream& operator<<(std::ostream& cout, const Color& a) {
return cout << "(" << a.r << ", " << a.g << ", " << a.b << ")";
}
}

View File

@ -8,6 +8,10 @@
#include "constants.h"
#include <cmath>
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "math_sycl.h"
#else
#if defined(__ARM_NEON)
#include "../simd/arm/emulation.h"
#else
@ -44,6 +48,9 @@ namespace embree
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
__forceinline int asInt (const float& a) { return *((int*)&a); }
__forceinline float asFloat(const int& a) { return *((float*)&a); }
#if defined(__WIN32__)
__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
#endif
@ -351,7 +358,11 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
__forceinline bool all(bool s) { return s; }
__forceinline bool none(bool s) { return !s; }
__forceinline bool all (bool s) { return s; }
__forceinline bool any (bool s) { return s; }
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
@ -453,3 +464,5 @@ __forceinline float nmsub ( const float a, const float b, const float c) { retur
return x | (y << 1) | (z << 2);
}
}
#endif

View File

@ -179,6 +179,48 @@ namespace embree
bounds1 = b1;
}
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
__forceinline LBBox(const BBox1f& time_range_in, const LBBox<T> lbounds, const BBox1f& target_time_range)
{
const BBox3f bounds0 = lbounds.bounds0;
const BBox3f bounds1 = lbounds.bounds1;
/* normalize global target_time_range to local time_range_in */
const BBox1f time_range((target_time_range.lower-time_range_in.lower)/time_range_in.size(),
(target_time_range.upper-time_range_in.lower)/time_range_in.size());
const BBox1f clipped_time_range(max(0.0f,time_range.lower), min(1.0f,time_range.upper));
/* compute bounds at begin and end of clipped time range */
BBox<T> b0 = lerp(bounds0,bounds1,clipped_time_range.lower);
BBox<T> b1 = lerp(bounds0,bounds1,clipped_time_range.upper);
/* make sure that b0 is properly bounded at time_range_in.lower */
{
const BBox<T> bt = lerp(b0, b1, (0.0f - time_range.lower) / time_range.size());
const T dlower = min(bounds0.lower-bt.lower, T(zero));
const T dupper = max(bounds0.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
/* make sure that b1 is properly bounded at time_range_in.upper */
{
const BBox<T> bt = lerp(b0, b1, (1.0f - time_range.lower) / time_range.size());
const T dlower = min(bounds1.lower-bt.lower, T(zero));
const T dupper = max(bounds1.upper-bt.upper, T(zero));
b0.lower += dlower; b1.lower += dlower;
b0.upper += dupper; b1.upper += dupper;
}
this->bounds0 = b0;
this->bounds1 = b1;
}
/*! calculates the linear bounds for target_time_range of primitive with it's time_range_in and bounds */
__forceinline LBBox(const BBox1f& time_range_in, const BBox<T>& bounds0, const BBox<T>& bounds1, const BBox1f& target_time_range)
: LBBox(time_range_in,LBBox(bounds0,bounds1),target_time_range) {}
public:
__forceinline bool empty() const {

View File

@ -18,6 +18,7 @@ namespace embree
/*! default matrix constructor */
__forceinline LinearSpace2 ( ) {}
__forceinline LinearSpace2 ( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; }
__forceinline LinearSpace2& operator=( const LinearSpace2& other ) { vx = other.vx; vy = other.vy; return *this; }

View File

@ -19,6 +19,7 @@ namespace embree
/*! default matrix constructor */
__forceinline LinearSpace3 ( ) {}
__forceinline LinearSpace3 ( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; }
__forceinline LinearSpace3& operator=( const LinearSpace3& other ) { vx = other.vx; vy = other.vy; vz = other.vz; return *this; }
@ -90,11 +91,14 @@ namespace embree
Vector vx,vy,vz;
};
#if !defined(__SYCL_DEVICE_ONLY__)
/*! compute transposed matrix */
template<> __forceinline const LinearSpace3<Vec3fa> LinearSpace3<Vec3fa>::transposed() const {
vfloat4 rx,ry,rz; transpose((vfloat4&)vx,(vfloat4&)vy,(vfloat4&)vz,vfloat4(zero),rx,ry,rz);
return LinearSpace3<Vec3fa>(Vec3fa(rx),Vec3fa(ry),Vec3fa(rz));
}
#endif
template<typename T>
__forceinline const LinearSpace3<T> transposed(const LinearSpace3<T>& xfm) {

View File

@ -0,0 +1,279 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/platform.h"
#include "../sys/intrinsics.h"
#include "constants.h"
#include <cmath>
namespace embree
{
__forceinline bool isvalid ( const float& v ) {
return (v > -FLT_LARGE) & (v < +FLT_LARGE);
}
__forceinline int cast_f2i(float f) {
return __builtin_bit_cast(int,f);
}
__forceinline float cast_i2f(int i) {
return __builtin_bit_cast(float,i);
}
__forceinline int toInt (const float& a) { return int(a); }
__forceinline float toFloat(const int& a) { return float(a); }
__forceinline float asFloat(const int a) { return __builtin_bit_cast(float,a); }
__forceinline int asInt (const float a) { return __builtin_bit_cast(int,a); }
//__forceinline bool finite ( const float x ) { return _finite(x) != 0; }
__forceinline float sign ( const float x ) { return x<0?-1.0f:1.0f; }
__forceinline float sqr ( const float x ) { return x*x; }
__forceinline float rcp ( const float x ) {
return sycl::native::recip(x);
}
__forceinline float signmsk(const float a) { return asFloat(asInt(a) & 0x80000000); }
//__forceinline float signmsk ( const float x ) {
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(0x80000000))));
//}
//__forceinline float xorf( const float x, const float y ) {
// return _mm_cvtss_f32(_mm_xor_ps(_mm_set_ss(x),_mm_set_ss(y)));
//}
//__forceinline float andf( const float x, const unsigned y ) {
// return _mm_cvtss_f32(_mm_and_ps(_mm_set_ss(x),_mm_castsi128_ps(_mm_set1_epi32(y))));
//}
__forceinline float rsqrt( const float x ) {
return sycl::rsqrt(x);
}
//__forceinline float nextafter(float x, float y) { if ((x<y) == (x>0)) return x*(1.1f+float(ulp)); else return x*(0.9f-float(ulp)); }
//__forceinline double nextafter(double x, double y) { return _nextafter(x, y); }
//__forceinline int roundf(float f) { return (int)(f + 0.5f); }
__forceinline float abs ( const float x ) { return sycl::fabs(x); }
__forceinline float acos ( const float x ) { return sycl::acos(x); }
__forceinline float asin ( const float x ) { return sycl::asin(x); }
__forceinline float atan ( const float x ) { return sycl::atan(x); }
__forceinline float atan2( const float y, const float x ) { return sycl::atan2(y, x); }
__forceinline float cos ( const float x ) { return sycl::cos(x); }
__forceinline float cosh ( const float x ) { return sycl::cosh(x); }
__forceinline float exp ( const float x ) { return sycl::exp(x); }
__forceinline float fmod ( const float x, const float y ) { return sycl::fmod(x, y); }
__forceinline float log ( const float x ) { return sycl::log(x); }
__forceinline float log10( const float x ) { return sycl::log10(x); }
__forceinline float pow ( const float x, const float y ) { return sycl::pow(x, y); }
__forceinline float sin ( const float x ) { return sycl::sin(x); }
__forceinline float sinh ( const float x ) { return sycl::sinh(x); }
__forceinline float sqrt ( const float x ) { return sycl::sqrt(x); }
__forceinline float tan ( const float x ) { return sycl::tan(x); }
__forceinline float tanh ( const float x ) { return sycl::tanh(x); }
__forceinline float floor( const float x ) { return sycl::floor(x); }
__forceinline float ceil ( const float x ) { return sycl::ceil(x); }
__forceinline float frac ( const float x ) { return x-floor(x); }
//__forceinline double abs ( const double x ) { return ::fabs(x); }
//__forceinline double sign ( const double x ) { return x<0?-1.0:1.0; }
//__forceinline double acos ( const double x ) { return ::acos (x); }
//__forceinline double asin ( const double x ) { return ::asin (x); }
//__forceinline double atan ( const double x ) { return ::atan (x); }
//__forceinline double atan2( const double y, const double x ) { return ::atan2(y, x); }
//__forceinline double cos ( const double x ) { return ::cos (x); }
//__forceinline double cosh ( const double x ) { return ::cosh (x); }
//__forceinline double exp ( const double x ) { return ::exp (x); }
//__forceinline double fmod ( const double x, const double y ) { return ::fmod (x, y); }
//__forceinline double log ( const double x ) { return ::log (x); }
//__forceinline double log10( const double x ) { return ::log10(x); }
//__forceinline double pow ( const double x, const double y ) { return ::pow (x, y); }
//__forceinline double rcp ( const double x ) { return 1.0/x; }
//__forceinline double rsqrt( const double x ) { return 1.0/::sqrt(x); }
//__forceinline double sin ( const double x ) { return ::sin (x); }
//__forceinline double sinh ( const double x ) { return ::sinh (x); }
//__forceinline double sqr ( const double x ) { return x*x; }
//__forceinline double sqrt ( const double x ) { return ::sqrt (x); }
//__forceinline double tan ( const double x ) { return ::tan (x); }
//__forceinline double tanh ( const double x ) { return ::tanh (x); }
//__forceinline double floor( const double x ) { return ::floor (x); }
//__forceinline double ceil ( const double x ) { return ::ceil (x); }
/*
#if defined(__SSE4_1__)
__forceinline float mini(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_min_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
#if defined(__SSE4_1__)
__forceinline float maxi(float a, float b) {
const __m128i ai = _mm_castps_si128(_mm_set_ss(a));
const __m128i bi = _mm_castps_si128(_mm_set_ss(b));
const __m128i ci = _mm_max_epi32(ai,bi);
return _mm_cvtss_f32(_mm_castsi128_ps(ci));
}
#endif
*/
template<typename T>
__forceinline T twice(const T& a) { return a+a; }
__forceinline int min(int a, int b) { return sycl::min(a,b); }
__forceinline unsigned min(unsigned a, unsigned b) { return sycl::min(a,b); }
__forceinline int64_t min(int64_t a, int64_t b) { return sycl::min(a,b); }
__forceinline float min(float a, float b) { return sycl::fmin(a,b); }
__forceinline double min(double a, double b) { return sycl::fmin(a,b); }
#if defined(__X86_64__)
__forceinline size_t min(size_t a, size_t b) { return sycl::min(a,b); }
#endif
template<typename T> __forceinline T min(const T& a, const T& b, const T& c) { return min(min(a,b),c); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d) { return min(min(a,b),min(c,d)); }
template<typename T> __forceinline T min(const T& a, const T& b, const T& c, const T& d, const T& e) { return min(min(min(a,b),min(c,d)),e); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c) { return mini(mini(a,b),c); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d) { return mini(mini(a,b),mini(c,d)); }
// template<typename T> __forceinline T mini(const T& a, const T& b, const T& c, const T& d, const T& e) { return mini(mini(mini(a,b),mini(c,d)),e); }
__forceinline int max(int a, int b) { return sycl::max(a,b); }
__forceinline unsigned max(unsigned a, unsigned b) { return sycl::max(a,b); }
__forceinline int64_t max(int64_t a, int64_t b) { return sycl::max(a,b); }
__forceinline float max(float a, float b) { return sycl::fmax(a,b); }
__forceinline double max(double a, double b) { return sycl::fmax(a,b); }
#if defined(__X86_64__)
__forceinline size_t max(size_t a, size_t b) { return sycl::max(a,b); }
#endif
template<typename T> __forceinline T max(const T& a, const T& b, const T& c) { return max(max(a,b),c); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d) { return max(max(a,b),max(c,d)); }
template<typename T> __forceinline T max(const T& a, const T& b, const T& c, const T& d, const T& e) { return max(max(max(a,b),max(c,d)),e); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c) { return maxi(maxi(a,b),c); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d) { return maxi(maxi(a,b),maxi(c,d)); }
// template<typename T> __forceinline T maxi(const T& a, const T& b, const T& c, const T& d, const T& e) { return maxi(maxi(maxi(a,b),maxi(c,d)),e); }
template<typename T> __forceinline T clamp(const T& x, const T& lower = T(zero), const T& upper = T(one)) { return max(min(x,upper),lower); }
template<typename T> __forceinline T clampz(const T& x, const T& upper) { return max(T(zero), min(x,upper)); }
template<typename T> __forceinline T deg2rad ( const T& x ) { return x * T(1.74532925199432957692e-2f); }
template<typename T> __forceinline T rad2deg ( const T& x ) { return x * T(5.72957795130823208768e1f); }
template<typename T> __forceinline T sin2cos ( const T& x ) { return sqrt(max(T(zero),T(one)-x*x)); }
template<typename T> __forceinline T cos2sin ( const T& x ) { return sin2cos(x); }
__forceinline float madd ( const float a, const float b, const float c) { return +sycl::fma(+a,b,+c); }
__forceinline float msub ( const float a, const float b, const float c) { return +sycl::fma(+a,b,-c); }
__forceinline float nmadd ( const float a, const float b, const float c) { return +sycl::fma(-a,b,+c); }
__forceinline float nmsub ( const float a, const float b, const float c) { return -sycl::fma(+a,b,+c); }
/*! random functions */
/*
template<typename T> T random() { return T(0); }
template<> __forceinline int random() { return int(rand()); }
template<> __forceinline uint32_t random() { return uint32_t(rand()) ^ (uint32_t(rand()) << 16); }
template<> __forceinline float random() { return rand()/float(RAND_MAX); }
template<> __forceinline double random() { return rand()/double(RAND_MAX); }
*/
/*! selects */
__forceinline bool select(bool s, bool t , bool f) { return s ? t : f; }
__forceinline int select(bool s, int t, int f) { return s ? t : f; }
__forceinline float select(bool s, float t, float f) { return s ? t : f; }
__forceinline bool none(bool s) { return !s; }
__forceinline bool all (bool s) { return s; }
__forceinline bool any (bool s) { return s; }
__forceinline unsigned movemask (bool s) { return (unsigned)s; }
__forceinline float lerp(const float v0, const float v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
template<typename T>
__forceinline T lerp2(const float x0, const float x1, const float x2, const float x3, const T& u, const T& v) {
return madd((1.0f-u),madd((1.0f-v),T(x0),v*T(x2)),u*madd((1.0f-v),T(x1),v*T(x3)));
}
/*! exchange */
template<typename T> __forceinline void xchg ( T& a, T& b ) { const T tmp = a; a = b; b = tmp; }
/* load/store */
template<typename Ty> struct mem;
template<> struct mem<float> {
static __forceinline float load (bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline float loadu(bool mask, const void* ptr) { return mask ? *(float*)ptr : 0.0f; }
static __forceinline void store (bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
static __forceinline void storeu(bool mask, void* ptr, const float v) { if (mask) *(float*)ptr = v; }
};
/*! bit reverse operation */
template<class T>
__forceinline T bitReverse(const T& vin)
{
T v = vin;
v = ((v >> 1) & 0x55555555) | ((v & 0x55555555) << 1);
v = ((v >> 2) & 0x33333333) | ((v & 0x33333333) << 2);
v = ((v >> 4) & 0x0F0F0F0F) | ((v & 0x0F0F0F0F) << 4);
v = ((v >> 8) & 0x00FF00FF) | ((v & 0x00FF00FF) << 8);
v = ( v >> 16 ) | ( v << 16);
return v;
}
/*! bit interleave operation */
template<class T>
__forceinline T bitInterleave(const T& xin, const T& yin, const T& zin)
{
T x = xin, y = yin, z = zin;
x = (x | (x << 16)) & 0x030000FF;
x = (x | (x << 8)) & 0x0300F00F;
x = (x | (x << 4)) & 0x030C30C3;
x = (x | (x << 2)) & 0x09249249;
y = (y | (y << 16)) & 0x030000FF;
y = (y | (y << 8)) & 0x0300F00F;
y = (y | (y << 4)) & 0x030C30C3;
y = (y | (y << 2)) & 0x09249249;
z = (z | (z << 16)) & 0x030000FF;
z = (z | (z << 8)) & 0x0300F00F;
z = (z | (z << 4)) & 0x030C30C3;
z = (z | (z << 2)) & 0x09249249;
return x | (y << 1) | (z << 2);
}
/*! bit interleave operation for 64bit data types*/
template<class T>
__forceinline T bitInterleave64(const T& xin, const T& yin, const T& zin){
T x = xin & 0x1fffff;
T y = yin & 0x1fffff;
T z = zin & 0x1fffff;
x = (x | x << 32) & 0x1f00000000ffff;
x = (x | x << 16) & 0x1f0000ff0000ff;
x = (x | x << 8) & 0x100f00f00f00f00f;
x = (x | x << 4) & 0x10c30c30c30c30c3;
x = (x | x << 2) & 0x1249249249249249;
y = (y | y << 32) & 0x1f00000000ffff;
y = (y | y << 16) & 0x1f0000ff0000ff;
y = (y | y << 8) & 0x100f00f00f00f00f;
y = (y | y << 4) & 0x10c30c30c30c30c3;
y = (y | y << 2) & 0x1249249249249249;
z = (z | z << 32) & 0x1f00000000ffff;
z = (z | z << 16) & 0x1f0000ff0000ff;
z = (z | z << 8) & 0x100f00f00f00f00f;
z = (z | z << 4) & 0x10c30c30c30c30c3;
z = (z | z << 2) & 0x1249249249249249;
return x | (y << 1) | (z << 2);
}
}

View File

@ -4,7 +4,7 @@
#pragma once
#include "../sys/platform.h"
#include "../math/math.h"
#include "../math/emath.h"
namespace embree
{

View File

@ -3,7 +3,7 @@
#pragma once
#include "math.h"
#include "emath.h"
namespace embree
{
@ -34,7 +34,7 @@ namespace embree
__forceinline Vec2( const T& x, const T& y ) : x(x), y(y) {}
__forceinline Vec2( const Vec2& other ) { x = other.x; y = other.y; }
__forceinline Vec2( const Vec2fa& other );
Vec2( const Vec2fa& other );
template<typename T1> __forceinline Vec2( const Vec2<T1>& a ) : x(T(a.x)), y(T(a.y)) {}
template<typename T1> __forceinline Vec2& operator =( const Vec2<T1>& other ) { x = other.x; y = other.y; return *this; }
@ -232,4 +232,5 @@ namespace embree
#if defined(__AVX512F__)
template<> __forceinline Vec2<vfloat16>::Vec2(const Vec2fa& a) : x(a.x), y(a.y) {}
#endif
}

View File

@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
#include "math.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec2fa_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
@ -316,3 +321,5 @@ namespace embree
typedef Vec2fa Vec2fa_t;
}
#endif

View File

@ -0,0 +1,270 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
struct Vec3fa;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec2fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec2fa
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 2 };
struct { float x,y; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ) {}
//__forceinline Vec2fa( const __m128 a ) : m128(a) {}
explicit Vec2fa(const Vec3fa& a);
__forceinline explicit Vec2fa( const vfloat<4>& a ) {
x = a[0];
y = a[1];
}
__forceinline Vec2fa ( const Vec2<float>& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2<float>& other ) { x = other.x; y = other.y; return *this; }
__forceinline Vec2fa ( const Vec2fa& other ) { x = other.x; y = other.y; }
__forceinline Vec2fa& operator =( const Vec2fa& other ) { x = other.x; y = other.y; return *this; }
__forceinline explicit Vec2fa( const float a ) : x(a), y(a) {}
__forceinline Vec2fa( const float x, const float y) : x(x), y(y) {}
//__forceinline explicit Vec2fa( const __m128i a ) : m128(_mm_cvtepi32_ps(a)) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec2fa load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec2fa(ptr[0],ptr[1]);
}
static __forceinline Vec2fa loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec2fa(ptr[0],ptr[1]);
}
static __forceinline void storeu ( void* a, const Vec2fa& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa( ZeroTy ) : x(0.0f), y(0.0f) {}
__forceinline Vec2fa( OneTy ) : x(1.0f), y(1.0f) {}
__forceinline Vec2fa( PosInfTy ) : x(+INFINITY), y(+INFINITY) {}
__forceinline Vec2fa( NegInfTy ) : x(-INFINITY), y(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
//__forceinline const float& operator []( const size_t index ) const { assert(index < 2); return (&x)[index]; }
//__forceinline float& operator []( const size_t index ) { assert(index < 2); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a ) { return a; }
__forceinline Vec2fa operator -( const Vec2fa& a ) { return Vec2fa(-a.x,-a.y); }
__forceinline Vec2fa abs ( const Vec2fa& a ) { return Vec2fa(sycl::fabs(a.x),sycl::fabs(a.y)); }
__forceinline Vec2fa sign ( const Vec2fa& a ) { return Vec2fa(sycl::sign(a.x),sycl::sign(a.y)); }
//__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(sycl::recip(a.x),sycl::recip(a.y)); }
__forceinline Vec2fa rcp ( const Vec2fa& a ) { return Vec2fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y)); }
__forceinline Vec2fa sqrt ( const Vec2fa& a ) { return Vec2fa(sycl::sqrt(a.x),sycl::sqrt(a.y)); }
__forceinline Vec2fa sqr ( const Vec2fa& a ) { return Vec2fa(a.x*a.x,a.y*a.y); }
__forceinline Vec2fa rsqrt( const Vec2fa& a ) { return Vec2fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y)); }
__forceinline Vec2fa zero_fix(const Vec2fa& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
return Vec2fa(x,y);
}
__forceinline Vec2fa rcp_safe(const Vec2fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec2fa log ( const Vec2fa& a ) {
return Vec2fa(sycl::log(a.x),sycl::log(a.y));
}
__forceinline Vec2fa exp ( const Vec2fa& a ) {
return Vec2fa(sycl::exp(a.x),sycl::exp(a.y));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa operator +( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x+b.x, a.y+b.y); }
__forceinline Vec2fa operator -( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x-b.x, a.y-b.y); }
__forceinline Vec2fa operator *( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x*b.x, a.y*b.y); }
__forceinline Vec2fa operator *( const Vec2fa& a, const float b ) { return a * Vec2fa(b); }
__forceinline Vec2fa operator *( const float a, const Vec2fa& b ) { return Vec2fa(a) * b; }
__forceinline Vec2fa operator /( const Vec2fa& a, const Vec2fa& b ) { return Vec2fa(a.x/b.x, a.y/b.y); }
__forceinline Vec2fa operator /( const Vec2fa& a, const float b ) { return Vec2fa(a.x/b, a.y/b); }
__forceinline Vec2fa operator /( const float a, const Vec2fa& b ) { return Vec2fa(a/b.x, a/b.y); }
__forceinline Vec2fa min( const Vec2fa& a, const Vec2fa& b ) {
return Vec2fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y));
}
__forceinline Vec2fa max( const Vec2fa& a, const Vec2fa& b ) {
return Vec2fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec2fa mini(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec2fa maxi(const Vec2fa& a, const Vec2fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec2fa pow ( const Vec2fa& a, const float& b ) {
return Vec2fa(powf(a.x,b),powf(a.y,b));
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa madd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y)); }
__forceinline Vec2fa msub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y)); }
__forceinline Vec2fa nmadd ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y)); }
__forceinline Vec2fa nmsub ( const Vec2fa& a, const Vec2fa& b, const Vec2fa& c) { return Vec2fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y)); }
__forceinline Vec2fa madd ( const float a, const Vec2fa& b, const Vec2fa& c) { return madd(Vec2fa(a),b,c); }
__forceinline Vec2fa msub ( const float a, const Vec2fa& b, const Vec2fa& c) { return msub(Vec2fa(a),b,c); }
__forceinline Vec2fa nmadd ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmadd(Vec2fa(a),b,c); }
__forceinline Vec2fa nmsub ( const float a, const Vec2fa& b, const Vec2fa& c) { return nmsub(Vec2fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa& operator +=( Vec2fa& a, const Vec2fa& b ) { return a = a + b; }
__forceinline Vec2fa& operator -=( Vec2fa& a, const Vec2fa& b ) { return a = a - b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const Vec2fa& b ) { return a = a * b; }
__forceinline Vec2fa& operator *=( Vec2fa& a, const float b ) { return a = a * b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const Vec2fa& b ) { return a = a / b; }
__forceinline Vec2fa& operator /=( Vec2fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec2fa& v) { return v.x+v.y; }
__forceinline float reduce_mul(const Vec2fa& v) { return v.x*v.y; }
__forceinline float reduce_min(const Vec2fa& v) { return sycl::fmin(v.x,v.y); }
__forceinline float reduce_max(const Vec2fa& v) { return sycl::fmax(v.x,v.y); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec2fa& a, const Vec2fa& b ) { return a.x == b.x && a.y == b.y; }
__forceinline bool operator !=( const Vec2fa& a, const Vec2fa& b ) { return a.x != b.x || a.y != b.y; }
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec2fa& a, const Vec2fa& b ) {
return reduce_add(a*b);
}
__forceinline Vec2fa cross ( const Vec2fa& a ) {
return Vec2fa(-a.y,a.x);
}
__forceinline float sqr_length ( const Vec2fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec2fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec2fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec2fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec2fa normalize( const Vec2fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec2fa& a, const Vec2fa& b ) { return length(a-b); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa select( bool s, const Vec2fa& t, const Vec2fa& f ) {
return Vec2fa(s ? t.x : f.x, s ? t.y : f.y);
}
__forceinline Vec2fa lerp(const Vec2fa& v0, const Vec2fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec2fa& a )
{
const Vec2fa b = abs(a);
if (b.x > b.y) return 0;
else return 1;
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec2fa trunc( const Vec2fa& a ) { return Vec2fa(sycl::trunc(a.x),sycl::trunc(a.y)); }
__forceinline Vec2fa floor( const Vec2fa& a ) { return Vec2fa(sycl::floor(a.x),sycl::floor(a.y)); }
__forceinline Vec2fa ceil ( const Vec2fa& a ) { return Vec2fa(sycl::ceil (a.x),sycl::ceil (a.y)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec2fa& a) {
return cout << "(" << a.x << ", " << a.y << ")";
}
/*template<>
__forceinline vfloat_impl<4>::vfloat_impl(const Vec2fa& a)
{
v = 0;
const unsigned int lid = get_sub_group_local_id();
if (lid == 0) v = a.x;
if (lid == 1) v = a.y;
}*/
typedef Vec2fa Vec2fa_t;
}

View File

@ -3,7 +3,7 @@
#pragma once
#include "math.h"
#include "emath.h"
namespace embree
{
@ -286,6 +286,8 @@ namespace embree
template<> __forceinline Vec3<float>::Vec3(const Vec3fa& a) { x = a.x; y = a.y; z = a.z; }
#if !defined(__SYCL_DEVICE_ONLY__)
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
@ -333,4 +335,23 @@ namespace embree
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) : x(a.x), y(a.y), z(a.z) {}
#endif
#else
#if defined(__SSE__)
template<> __forceinline Vec3<vfloat4>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec3<vfloat8>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec3<vfloat16>::Vec3(const Vec3fa& a) {
x = a.x; y = a.y; z = a.z;
}
#endif
#endif
}

View File

@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
#include "math.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3ba_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
@ -118,3 +123,5 @@ namespace embree
return cout << "(" << (a.x ? "1" : "0") << ", " << (a.y ? "1" : "0") << ", " << (a.z ? "1" : "0") << ")";
}
}
#endif

View File

@ -0,0 +1,115 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ba Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ba
{
//ALIGNED_STRUCT_(16);
struct { bool x,y,z; };
typedef bool Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( ) {}
//__forceinline Vec3ba( const __m128 input ) : m128(input) {}
__forceinline Vec3ba( const Vec3ba& other ) : x(other.x), y(other.y), z(other.z) {}
__forceinline Vec3ba& operator =(const Vec3ba& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3ba( bool a ) : x(a), y(a), z(a) {}
__forceinline Vec3ba( bool a, bool b, bool c) : x(a), y(b), z(c) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba( FalseTy ) : x(false), y(false), z(false) {}
__forceinline Vec3ba( TrueTy ) : x(true), y(true), z(true) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
//__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
//__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator !( const Vec3ba& a ) { return Vec3ba(!a.x,!a.y,!a.z); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba operator &( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x & b.x, a.y & b.y, a.z & b.z); }
__forceinline Vec3ba operator |( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x | b.x, a.y | b.y, a.z | b.z); }
__forceinline Vec3ba operator ^( const Vec3ba& a, const Vec3ba& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ba& operator &=( Vec3ba& a, const Vec3ba& b ) { return a = a & b; }
__forceinline Vec3ba& operator |=( Vec3ba& a, const Vec3ba& b ) { return a = a | b; }
__forceinline Vec3ba& operator ^=( Vec3ba& a, const Vec3ba& b ) { return a = a ^ b; }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators + Select
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ba& a, const Vec3ba& b ) {
return a.x == b.x && a.y == b.y && a.z == b.z;
}
__forceinline bool operator !=( const Vec3ba& a, const Vec3ba& b ) {
return a.x != b.x || a.y != b.y || a.z != b.z;
}
/*
__forceinline bool operator < ( const Vec3ba& a, const Vec3ba& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Reduction Operations
////////////////////////////////////////////////////////////////////////////////
__forceinline bool reduce_and( const Vec3ba& a ) { return a.x & a.y & a.z; }
__forceinline bool reduce_or ( const Vec3ba& a ) { return a.x | a.y | a.z; }
__forceinline bool all ( const Vec3ba& b ) { return reduce_and(b); }
__forceinline bool any ( const Vec3ba& b ) { return reduce_or(b); }
__forceinline bool none ( const Vec3ba& b ) { return !reduce_or(b); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3ba& a) {
return cout;
}
}

View File

@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
#include "math.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3fa_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
@ -441,7 +446,6 @@ namespace embree
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { m128 = _mm_set_ps(0, other.z, other.y, other.x); return *this; }
__forceinline Vec3fx ( const Vec3fx& other ) { m128 = other.m128; }
__forceinline Vec3fx& operator =( const Vec3fx& other ) { m128 = other.m128; return *this; }
__forceinline explicit Vec3fx( const float a ) : m128(_mm_set1_ps(a)) {}
@ -783,3 +787,5 @@ namespace embree
typedef Vec3fx Vec3ff;
}
#endif

View File

@ -0,0 +1,617 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fa Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fa
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
struct { float x,y,z, do_not_use; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ) {}
//__forceinline Vec3fa( const __m128 a ) : m128(a) {}
//__forceinline explicit Vec3fa(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]) {}
__forceinline Vec3fa ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
//__forceinline Vec3fa& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline Vec3fa ( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; }
__forceinline Vec3fa& operator =( const Vec3fa& other ) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3fa( const float a ) : x(a), y(a), z(a) {}
__forceinline Vec3fa( const float x, const float y, const float z) : x(x), y(y), z(z) {}
__forceinline explicit Vec3fa( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
__forceinline operator vfloat4() const { return vfloat4(x,y,z,0.0f); } // FIXME: we should not need this!!
//friend __forceinline Vec3fa copy_a( const Vec3fa& a, const Vec3fa& b ) { Vec3fa c = a; c.a = b.a; return c; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fa load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fa(ptr[0],ptr[1],ptr[2]);
}
static __forceinline Vec3fa loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fa(ptr[0],ptr[1],ptr[2]);
}
static __forceinline void storeu ( void* a, const Vec3fa& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f) {}
__forceinline Vec3fa( OneTy ) : x(1.0f), y(1.0f), z(1.0f) {}
__forceinline Vec3fa( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY) {}
__forceinline Vec3fa( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a ) { return a; }
__forceinline Vec3fa operator -( const Vec3fa& a ) { return Vec3fa(-a.x,-a.y,-a.z); }
__forceinline Vec3fa abs ( const Vec3fa& a ) { return Vec3fa(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z)); }
__forceinline Vec3fa sign ( const Vec3fa& a ) { return Vec3fa(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z)); }
//__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fa rcp ( const Vec3fa& a ) { return Vec3fa(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z)); }
__forceinline Vec3fa sqrt ( const Vec3fa& a ) { return Vec3fa(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z)); }
__forceinline Vec3fa sqr ( const Vec3fa& a ) { return Vec3fa(a.x*a.x,a.y*a.y,a.z*a.z); }
__forceinline Vec3fa rsqrt( const Vec3fa& a ) { return Vec3fa(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z)); }
__forceinline Vec3fa zero_fix(const Vec3fa& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
return Vec3fa(x,y,z);
}
__forceinline Vec3fa rcp_safe(const Vec3fa& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fa log ( const Vec3fa& a ) {
return Vec3fa(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
}
__forceinline Vec3fa exp ( const Vec3fa& a ) {
return Vec3fa(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa operator +( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x+b.x, a.y+b.y, a.z+b.z); }
__forceinline Vec3fa operator -( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x-b.x, a.y-b.y, a.z-b.z); }
__forceinline Vec3fa operator *( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x*b.x, a.y*b.y, a.z*b.z); }
__forceinline Vec3fa operator *( const Vec3fa& a, const float b ) { return a * Vec3fa(b); }
__forceinline Vec3fa operator *( const float a, const Vec3fa& b ) { return Vec3fa(a) * b; }
__forceinline Vec3fa operator /( const Vec3fa& a, const Vec3fa& b ) { return Vec3fa(a.x/b.x, a.y/b.y, a.z/b.z); }
__forceinline Vec3fa operator /( const Vec3fa& a, const float b ) { return Vec3fa(a.x/b, a.y/b, a.z/b); }
__forceinline Vec3fa operator /( const float a, const Vec3fa& b ) { return Vec3fa(a/b.x, a/b.y, a/b.z); }
__forceinline Vec3fa min( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z));
}
__forceinline Vec3fa max( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec3fa mini(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec3fa maxi(const Vec3fa& a, const Vec3fa& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
*/
__forceinline Vec3fa pow ( const Vec3fa& a, const float& b ) {
return Vec3fa(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa madd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z)); }
__forceinline Vec3fa msub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z)); }
__forceinline Vec3fa nmadd ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z)); }
__forceinline Vec3fa nmsub ( const Vec3fa& a, const Vec3fa& b, const Vec3fa& c) { return Vec3fa(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z)); }
__forceinline Vec3fa madd ( const float a, const Vec3fa& b, const Vec3fa& c) { return madd(Vec3fa(a),b,c); }
__forceinline Vec3fa msub ( const float a, const Vec3fa& b, const Vec3fa& c) { return msub(Vec3fa(a),b,c); }
__forceinline Vec3fa nmadd ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmadd(Vec3fa(a),b,c); }
__forceinline Vec3fa nmsub ( const float a, const Vec3fa& b, const Vec3fa& c) { return nmsub(Vec3fa(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa& operator +=( Vec3fa& a, const Vec3fa& b ) { return a = a + b; }
__forceinline Vec3fa& operator -=( Vec3fa& a, const Vec3fa& b ) { return a = a - b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const Vec3fa& b ) { return a = a * b; }
__forceinline Vec3fa& operator *=( Vec3fa& a, const float b ) { return a = a * b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const Vec3fa& b ) { return a = a / b; }
__forceinline Vec3fa& operator /=( Vec3fa& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fa& v) { return v.x+v.y+v.z; }
__forceinline float reduce_mul(const Vec3fa& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fa& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
__forceinline float reduce_max(const Vec3fa& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fa& a, const Vec3fa& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
__forceinline bool operator !=( const Vec3fa& a, const Vec3fa& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
__forceinline Vec3ba eq_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba neq_mask(const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
__forceinline Vec3ba lt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba le_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
__forceinline Vec3ba gt_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
__forceinline Vec3ba ge_mask( const Vec3fa& a, const Vec3fa& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
__forceinline bool isvalid ( const Vec3fa& v ) {
return all(gt_mask(v,Vec3fa(-FLT_LARGE)) & lt_mask(v,Vec3fa(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fa& a ) {
return all(ge_mask(a,Vec3fa(-FLT_MAX)) & le_mask(a,Vec3fa(+FLT_MAX)));
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec3fa& a, const Vec3fa& b ) {
return reduce_add(a*b);
}
__forceinline Vec3fa cross ( const Vec3fa& a, const Vec3fa& b ) {
return Vec3fa(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
}
__forceinline float sqr_length ( const Vec3fa& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fa& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fa& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fa& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fa normalize( const Vec3fa& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fa& a, const Vec3fa& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fa& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fa& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fa normalize_safe( const Vec3fa& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fa dnormalize(const Vec3fa& p, const Vec3fa& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa select( bool s, const Vec3fa& t, const Vec3fa& f ) {
return Vec3fa(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z);
}
__forceinline Vec3fa select( const Vec3ba& s, const Vec3fa& t, const Vec3fa& f ) {
return Vec3fa(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
}
__forceinline Vec3fa lerp(const Vec3fa& v0, const Vec3fa& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fa& a )
{
const Vec3fa b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fa trunc( const Vec3fa& a ) { return Vec3fa(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z)); }
__forceinline Vec3fa floor( const Vec3fa& a ) { return Vec3fa(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z)); }
__forceinline Vec3fa ceil ( const Vec3fa& a ) { return Vec3fa(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3fa& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
__forceinline Vec2fa::Vec2fa(const Vec3fa& a)
: x(a.x), y(a.y) {}
__forceinline Vec3ia::Vec3ia( const Vec3fa& a )
: x((int)a.x), y((int)a.y), z((int)a.z) {}
typedef Vec3fa Vec3fa_t;
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3fx Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3fx
{
//ALIGNED_STRUCT_(16);
typedef float Scalar;
enum { N = 3 };
struct { float x,y,z; union { int a; unsigned u; float w; }; };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ) {}
//__forceinline Vec3fx( const __m128 a ) : m128(a) {}
__forceinline explicit Vec3fx(const vfloat4& a) : x(a[0]), y(a[1]), z(a[2]), w(a[3]) {}
__forceinline explicit Vec3fx(const Vec3fa& v) : x(v.x), y(v.y), z(v.z), w(0.0f) {}
__forceinline operator Vec3fa() const { return Vec3fa(x,y,z); }
__forceinline explicit Vec3fx ( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; }
//__forceinline Vec3fx& operator =( const Vec3<float>& other ) { x = other.x; y = other.y; z = other.z; return *this; }
//__forceinline Vec3fx ( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; }
//__forceinline Vec3fx& operator =( const Vec3fx& other ) { *(sycl::float4*)this = *(const sycl::float4*)&other; return *this; }
__forceinline explicit Vec3fx( const float a ) : x(a), y(a), z(a), w(a) {}
__forceinline Vec3fx( const float x, const float y, const float z) : x(x), y(y), z(z), w(z) {}
__forceinline Vec3fx( const Vec3fa& other, const int a1) : x(other.x), y(other.y), z(other.z), a(a1) {}
__forceinline Vec3fx( const Vec3fa& other, const unsigned a1) : x(other.x), y(other.y), z(other.z), u(a1) {}
__forceinline Vec3fx( const Vec3fa& other, const float w1) : x(other.x), y(other.y), z(other.z), w(w1) {}
//__forceinline Vec3fx( const float x, const float y, const float z, const int a) : x(x), y(y), z(z), a(a) {} // not working properly!
//__forceinline Vec3fx( const float x, const float y, const float z, const unsigned a) : x(x), y(y), z(z), u(a) {} // not working properly!
__forceinline Vec3fx( const float x, const float y, const float z, const float w) : x(x), y(y), z(z), w(w) {}
__forceinline explicit Vec3fx( const Vec3ia& a ) : x((float)a.x), y((float)a.y), z((float)a.z), w(0.0f) {}
//__forceinline operator const __m128&() const { return m128; }
//__forceinline operator __m128&() { return m128; }
__forceinline operator vfloat4() const { return vfloat4(x,y,z,w); }
//friend __forceinline Vec3fx copy_a( const Vec3fx& a, const Vec3fx& b ) { Vec3fx c = a; c.a = b.a; return c; }
////////////////////////////////////////////////////////////////////////////////
/// Loads and Stores
////////////////////////////////////////////////////////////////////////////////
static __forceinline Vec3fx load( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
}
static __forceinline Vec3fx loadu( const void* const a ) {
const float* ptr = (const float*)a;
return Vec3fx(ptr[0],ptr[1],ptr[2],ptr[3]);
}
static __forceinline void storeu ( void* a, const Vec3fx& v ) {
float* ptr = (float*)a;
ptr[0] = v.x; ptr[1] = v.y; ptr[2] = v.z; ptr[3] = v.w;
}
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx( ZeroTy ) : x(0.0f), y(0.0f), z(0.0f), w(0.0f) {}
__forceinline Vec3fx( OneTy ) : x(1.0f), y(1.0f), z(1.0f), w(1.0f) {}
__forceinline Vec3fx( PosInfTy ) : x(+INFINITY), y(+INFINITY), z(+INFINITY), w(+INFINITY) {}
__forceinline Vec3fx( NegInfTy ) : x(-INFINITY), y(-INFINITY), z(-INFINITY), w(-INFINITY) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const float& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline float& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a ) { return a; }
__forceinline Vec3fx operator -( const Vec3fx& a ) { return Vec3fx(-a.x,-a.y,-a.z,-a.w); }
__forceinline Vec3fx abs ( const Vec3fx& a ) { return Vec3fx(sycl::fabs(a.x),sycl::fabs(a.y),sycl::fabs(a.z),sycl::fabs(a.w)); }
__forceinline Vec3fx sign ( const Vec3fx& a ) { return Vec3fx(sycl::sign(a.x),sycl::sign(a.y),sycl::sign(a.z),sycl::sign(a.z)); }
//__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(sycl::recip(a.x),sycl::recip(a.y),sycl::recip(a.z)); }
__forceinline Vec3fx rcp ( const Vec3fx& a ) { return Vec3fx(__sycl_std::__invoke_native_recip<float>(a.x),__sycl_std::__invoke_native_recip<float>(a.y),__sycl_std::__invoke_native_recip<float>(a.z),__sycl_std::__invoke_native_recip<float>(a.w)); }
__forceinline Vec3fx sqrt ( const Vec3fx& a ) { return Vec3fx(sycl::sqrt(a.x),sycl::sqrt(a.y),sycl::sqrt(a.z),sycl::sqrt(a.w)); }
__forceinline Vec3fx sqr ( const Vec3fx& a ) { return Vec3fx(a.x*a.x,a.y*a.y,a.z*a.z,a.w*a.w); }
__forceinline Vec3fx rsqrt( const Vec3fx& a ) { return Vec3fx(sycl::rsqrt(a.x),sycl::rsqrt(a.y),sycl::rsqrt(a.z),sycl::rsqrt(a.w)); }
__forceinline Vec3fx zero_fix(const Vec3fx& a) {
const float x = sycl::fabs(a.x) < min_rcp_input ? min_rcp_input : a.x;
const float y = sycl::fabs(a.y) < min_rcp_input ? min_rcp_input : a.y;
const float z = sycl::fabs(a.z) < min_rcp_input ? min_rcp_input : a.z;
return Vec3fx(x,y,z);
}
__forceinline Vec3fx rcp_safe(const Vec3fx& a) {
return rcp(zero_fix(a));
}
__forceinline Vec3fx log ( const Vec3fx& a ) {
return Vec3fx(sycl::log(a.x),sycl::log(a.y),sycl::log(a.z));
}
__forceinline Vec3fx exp ( const Vec3fx& a ) {
return Vec3fx(sycl::exp(a.x),sycl::exp(a.y),sycl::exp(a.z));
}
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx operator +( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x+b.x, a.y+b.y, a.z+b.z, a.w+b.w); }
__forceinline Vec3fx operator -( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x-b.x, a.y-b.y, a.z-b.z, a.w-b.w); }
__forceinline Vec3fx operator *( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x*b.x, a.y*b.y, a.z*b.z, a.w*b.w); }
__forceinline Vec3fx operator *( const Vec3fx& a, const float b ) { return a * Vec3fx(b); }
__forceinline Vec3fx operator *( const float a, const Vec3fx& b ) { return Vec3fx(a) * b; }
__forceinline Vec3fx operator /( const Vec3fx& a, const Vec3fx& b ) { return Vec3fx(a.x/b.x, a.y/b.y, a.z/b.z, a.w/b.w); }
__forceinline Vec3fx operator /( const Vec3fx& a, const float b ) { return Vec3fx(a.x/b, a.y/b, a.z/b, a.w/b); }
__forceinline Vec3fx operator /( const float a, const Vec3fx& b ) { return Vec3fx(a/b.x, a/b.y, a/b.z, a/b.w); }
__forceinline Vec3fx min( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(sycl::fmin(a.x,b.x), sycl::fmin(a.y,b.y), sycl::fmin(a.z,b.z), sycl::fmin(a.w,b.w));
}
__forceinline Vec3fx max( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(sycl::fmax(a.x,b.x), sycl::fmax(a.y,b.y), sycl::fmax(a.z,b.z), sycl::fmax(a.w,b.w));
}
/*
#if defined(__SSE4_1__)
__forceinline Vec3fx mini(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_min_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
#if defined(__SSE4_1__)
__forceinline Vec3fx maxi(const Vec3fx& a, const Vec3fx& b) {
const vint4 ai = _mm_castps_si128(a);
const vint4 bi = _mm_castps_si128(b);
const vint4 ci = _mm_max_epi32(ai,bi);
return _mm_castsi128_ps(ci);
}
#endif
__forceinline Vec3fx pow ( const Vec3fx& a, const float& b ) {
return Vec3fx(powf(a.x,b),powf(a.y,b),powf(a.z,b));
}
*/
////////////////////////////////////////////////////////////////////////////////
/// Ternary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx madd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(madd(a.x,b.x,c.x), madd(a.y,b.y,c.y), madd(a.z,b.z,c.z), madd(a.w,b.w,c.w)); }
__forceinline Vec3fx msub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(msub(a.x,b.x,c.x), msub(a.y,b.y,c.y), msub(a.z,b.z,c.z), msub(a.w,b.w,c.w)); }
__forceinline Vec3fx nmadd ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmadd(a.x,b.x,c.x), nmadd(a.y,b.y,c.y), nmadd(a.z,b.z,c.z), nmadd(a.w,b.w,c.w)); }
__forceinline Vec3fx nmsub ( const Vec3fx& a, const Vec3fx& b, const Vec3fx& c) { return Vec3fx(nmsub(a.x,b.x,c.x), nmsub(a.y,b.y,c.y), nmsub(a.z,b.z,c.z), nmsub(a.w,b.w,c.w)); }
__forceinline Vec3fx madd ( const float a, const Vec3fx& b, const Vec3fx& c) { return madd(Vec3fx(a),b,c); }
__forceinline Vec3fx msub ( const float a, const Vec3fx& b, const Vec3fx& c) { return msub(Vec3fx(a),b,c); }
__forceinline Vec3fx nmadd ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmadd(Vec3fx(a),b,c); }
__forceinline Vec3fx nmsub ( const float a, const Vec3fx& b, const Vec3fx& c) { return nmsub(Vec3fx(a),b,c); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx& operator +=( Vec3fx& a, const Vec3fx& b ) { return a = a + b; }
__forceinline Vec3fx& operator -=( Vec3fx& a, const Vec3fx& b ) { return a = a - b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const Vec3fx& b ) { return a = a * b; }
__forceinline Vec3fx& operator *=( Vec3fx& a, const float b ) { return a = a * b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const Vec3fx& b ) { return a = a / b; }
__forceinline Vec3fx& operator /=( Vec3fx& a, const float b ) { return a = a / b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline float reduce_add(const Vec3fx& v) { return v.x+v.y+v.z; }
__forceinline float reduce_mul(const Vec3fx& v) { return v.x*v.y*v.z; }
__forceinline float reduce_min(const Vec3fx& v) { return sycl::fmin(sycl::fmin(v.x,v.y),v.z); }
__forceinline float reduce_max(const Vec3fx& v) { return sycl::fmax(sycl::fmax(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3fx& a, const Vec3fx& b ) { return a.x == b.x && a.y == b.y && a.z == b.z; }
__forceinline bool operator !=( const Vec3fx& a, const Vec3fx& b ) { return a.x != b.x || a.y != b.y || a.z != b.z; }
__forceinline Vec3ba eq_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba neq_mask(const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x != b.x, a.y != b.y, a.z != b.z); }
__forceinline Vec3ba lt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba le_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x <= b.x, a.y <= b.y, a.z <= b.z); }
__forceinline Vec3ba gt_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
__forceinline Vec3ba ge_mask( const Vec3fx& a, const Vec3fx& b ) { return Vec3ba(a.x >= b.x, a.y >= b.y, a.z >= b.z); }
__forceinline bool isvalid ( const Vec3fx& v ) {
return all(gt_mask(v,Vec3fx(-FLT_LARGE)) & lt_mask(v,Vec3fx(+FLT_LARGE)));
}
__forceinline bool is_finite ( const Vec3fx& a ) {
return all(ge_mask(a,Vec3fx(-FLT_MAX)) & le_mask(a,Vec3fx(+FLT_MAX)));
}
__forceinline bool isvalid4 ( const Vec3fx& v ) {
const bool valid_x = v.x >= -FLT_LARGE & v.x <= +FLT_LARGE;
const bool valid_y = v.y >= -FLT_LARGE & v.y <= +FLT_LARGE;
const bool valid_z = v.z >= -FLT_LARGE & v.z <= +FLT_LARGE;
const bool valid_w = v.w >= -FLT_LARGE & v.w <= +FLT_LARGE;
return valid_x & valid_y & valid_z & valid_w;
}
__forceinline bool is_finite4 ( const Vec3fx& v ) {
const bool finite_x = v.x >= -FLT_MAX & v.x <= +FLT_MAX;
const bool finite_y = v.y >= -FLT_MAX & v.y <= +FLT_MAX;
const bool finite_z = v.z >= -FLT_MAX & v.z <= +FLT_MAX;
const bool finite_w = v.w >= -FLT_MAX & v.w <= +FLT_MAX;
return finite_x & finite_y & finite_z & finite_w;
}
////////////////////////////////////////////////////////////////////////////////
/// Euclidian Space Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline float dot ( const Vec3fx& a, const Vec3fx& b ) {
return reduce_add(a*b);
}
__forceinline Vec3fx cross ( const Vec3fx& a, const Vec3fx& b ) {
return Vec3fx(msub(a.y,b.z,a.z*b.y), msub(a.z,b.x,a.x*b.z), msub(a.x,b.y,a.y*b.x));
}
__forceinline float sqr_length ( const Vec3fx& a ) { return dot(a,a); }
__forceinline float rcp_length ( const Vec3fx& a ) { return rsqrt(dot(a,a)); }
__forceinline float rcp_length2( const Vec3fx& a ) { return rcp(dot(a,a)); }
__forceinline float length ( const Vec3fx& a ) { return sqrt(dot(a,a)); }
__forceinline Vec3fx normalize( const Vec3fx& a ) { return a*rsqrt(dot(a,a)); }
__forceinline float distance ( const Vec3fx& a, const Vec3fx& b ) { return length(a-b); }
__forceinline float halfArea ( const Vec3fx& d ) { return madd(d.x,(d.y+d.z),d.y*d.z); }
__forceinline float area ( const Vec3fx& d ) { return 2.0f*halfArea(d); }
__forceinline Vec3fx normalize_safe( const Vec3fx& a ) {
const float d = dot(a,a); if (unlikely(d == 0.0f)) return a; else return a*rsqrt(d);
}
/*! differentiated normalization */
__forceinline Vec3fx dnormalize(const Vec3fx& p, const Vec3fx& dp)
{
const float pp = dot(p,p);
const float pdp = dot(p,dp);
return (pp*dp-pdp*p)*rcp(pp)*rsqrt(pp);
}
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx select( bool s, const Vec3fx& t, const Vec3fx& f ) {
return Vec3fx(s ? t.x : f.x, s ? t.y : f.y, s ? t.z : f.z, s ? t.w : f.w);
}
__forceinline Vec3fx select( const Vec3ba& s, const Vec3fx& t, const Vec3fx& f ) {
return Vec3fx(s.x ? t.x : f.x, s.y ? t.y : f.y, s.z ? t.z : f.z);
}
__forceinline Vec3fx lerp(const Vec3fx& v0, const Vec3fx& v1, const float t) {
return madd(1.0f-t,v0,t*v1);
}
__forceinline int maxDim ( const Vec3fx& a )
{
const Vec3fx b = abs(a);
if (b.x > b.y) {
if (b.x > b.z) return 0; else return 2;
} else {
if (b.y > b.z) return 1; else return 2;
}
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3fx trunc( const Vec3fx& a ) { return Vec3fx(sycl::trunc(a.x),sycl::trunc(a.y),sycl::trunc(a.z),sycl::trunc(a.w)); }
__forceinline Vec3fx floor( const Vec3fx& a ) { return Vec3fx(sycl::floor(a.x),sycl::floor(a.y),sycl::floor(a.z),sycl::floor(a.w)); }
__forceinline Vec3fx ceil ( const Vec3fx& a ) { return Vec3fx(sycl::ceil (a.x),sycl::ceil (a.y),sycl::ceil (a.z),sycl::ceil (a.w)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3fx& a) {
return cout << "(" << a.x << ", " << a.y << ", " << a.z << "," << a.w << ")";
}
typedef Vec3fx Vec3ff;
//__forceinline Vec2fa::Vec2fa(const Vec3fx& a)
// : x(a.x), y(a.y) {}
//__forceinline Vec3ia::Vec3ia( const Vec3fx& a )
// : x((int)a.x), y((int)a.y), z((int)a.z) {}
}

View File

@ -4,7 +4,12 @@
#pragma once
#include "../sys/alloc.h"
#include "math.h"
#include "emath.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
# include "vec3ia_sycl.h"
#else
#include "../simd/sse.h"
namespace embree
@ -194,3 +199,5 @@ namespace embree
return cout << "(" << a.x << ", " << a.y << ", " << a.z << ")";
}
}
#endif

View File

@ -0,0 +1,178 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "../sys/alloc.h"
#include "emath.h"
#include "../simd/sse.h"
namespace embree
{
////////////////////////////////////////////////////////////////////////////////
/// SSE Vec3ia Type
////////////////////////////////////////////////////////////////////////////////
struct __aligned(16) Vec3ia
{
ALIGNED_STRUCT_(16);
struct { int x,y,z; };
typedef int Scalar;
enum { N = 3 };
////////////////////////////////////////////////////////////////////////////////
/// Constructors, Assignment & Cast Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ) {}
//__forceinline Vec3ia( const __m128i a ) : m128(a) {}
__forceinline Vec3ia( const Vec3ia& other ) : x(other.x), y(other.y), z(other.z) {}
__forceinline Vec3ia& operator =(const Vec3ia& other) { x = other.x; y = other.y; z = other.z; return *this; }
__forceinline explicit Vec3ia( const int a ) : x(a), y(a), z(a) {}
__forceinline Vec3ia( const int x, const int y, const int z) : x(x), y(y), z(z) {}
//__forceinline explicit Vec3ia( const __m128 a ) : m128(_mm_cvtps_epi32(a)) {}
__forceinline explicit Vec3ia(const vint4& a) : x(a[0]), y(a[1]), z(a[2]) {}
__forceinline explicit Vec3ia( const Vec3fa& a );
//__forceinline operator const __m128i&() const { return m128; }
//__forceinline operator __m128i&() { return m128; }
__forceinline operator vint4() const { return vint4(x,y,z,z); }
////////////////////////////////////////////////////////////////////////////////
/// Constants
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia( ZeroTy ) : x(0), y(0), z(0) {}
__forceinline Vec3ia( OneTy ) : x(1), y(1), z(1) {}
__forceinline Vec3ia( PosInfTy ) : x(0x7FFFFFFF), y(0x7FFFFFFF), z(0x7FFFFFFF) {}
__forceinline Vec3ia( NegInfTy ) : x(0x80000000), y(0x80000000), z(0x80000000) {}
////////////////////////////////////////////////////////////////////////////////
/// Array Access
////////////////////////////////////////////////////////////////////////////////
__forceinline const int& operator []( const size_t index ) const { assert(index < 3); return (&x)[index]; }
__forceinline int& operator []( const size_t index ) { assert(index < 3); return (&x)[index]; }
};
////////////////////////////////////////////////////////////////////////////////
/// Unary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a ) { return Vec3ia(+a.x,+a.y,+a.z); }
__forceinline Vec3ia operator -( const Vec3ia& a ) { return Vec3ia(-a.x,-a.y,-a.z); }
__forceinline Vec3ia abs ( const Vec3ia& a ) { return Vec3ia(sycl::abs(a.x),sycl::abs(a.y),sycl::abs(a.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Binary Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia operator +( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x+b.x, a.y+b.y, a.z+b.z); }
__forceinline Vec3ia operator +( const Vec3ia& a, const int b ) { return a+Vec3ia(b); }
__forceinline Vec3ia operator +( const int a, const Vec3ia& b ) { return Vec3ia(a)+b; }
__forceinline Vec3ia operator -( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x-b.x, a.y-b.y, a.z-b.z); }
__forceinline Vec3ia operator -( const Vec3ia& a, const int b ) { return a-Vec3ia(b); }
__forceinline Vec3ia operator -( const int a, const Vec3ia& b ) { return Vec3ia(a)-b; }
__forceinline Vec3ia operator *( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x*b.x, a.y*b.y, a.z*b.z); }
__forceinline Vec3ia operator *( const Vec3ia& a, const int b ) { return a * Vec3ia(b); }
__forceinline Vec3ia operator *( const int a, const Vec3ia& b ) { return Vec3ia(a) * b; }
__forceinline Vec3ia operator &( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x&b.x, a.y&b.y, a.z&b.z); }
__forceinline Vec3ia operator &( const Vec3ia& a, const int b ) { return a & Vec3ia(b); }
__forceinline Vec3ia operator &( const int a, const Vec3ia& b ) { return Vec3ia(a) & b; }
__forceinline Vec3ia operator |( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x|b.x, a.y|b.y, a.z|b.z); }
__forceinline Vec3ia operator |( const Vec3ia& a, const int b ) { return a | Vec3ia(b); }
__forceinline Vec3ia operator |( const int a, const Vec3ia& b ) { return Vec3ia(a) | b; }
__forceinline Vec3ia operator ^( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(a.x^b.x, a.y^b.y, a.z^b.z); }
__forceinline Vec3ia operator ^( const Vec3ia& a, const int b ) { return a ^ Vec3ia(b); }
__forceinline Vec3ia operator ^( const int a, const Vec3ia& b ) { return Vec3ia(a) ^ b; }
__forceinline Vec3ia operator <<( const Vec3ia& a, const int n ) { return Vec3ia(a.x<<n, a.y<<n, a.z<<n); }
__forceinline Vec3ia operator >>( const Vec3ia& a, const int n ) { return Vec3ia(a.x>>n, a.y>>n, a.z>>n); }
__forceinline Vec3ia sll ( const Vec3ia& a, const int b ) { return Vec3ia(a.x<<b, a.y<<b, a.z<<b); }
__forceinline Vec3ia sra ( const Vec3ia& a, const int b ) { return Vec3ia(a.x>>b, a.y>>b, a.z>>b); }
__forceinline Vec3ia srl ( const Vec3ia& a, const int b ) { return Vec3ia(unsigned(a.x)>>b, unsigned(a.y)>>b, unsigned(a.z)>>b); }
////////////////////////////////////////////////////////////////////////////////
/// Assignment Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia& operator +=( Vec3ia& a, const Vec3ia& b ) { return a = a + b; }
__forceinline Vec3ia& operator +=( Vec3ia& a, const int& b ) { return a = a + b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const Vec3ia& b ) { return a = a - b; }
__forceinline Vec3ia& operator -=( Vec3ia& a, const int& b ) { return a = a - b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const Vec3ia& b ) { return a = a * b; }
__forceinline Vec3ia& operator *=( Vec3ia& a, const int& b ) { return a = a * b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const Vec3ia& b ) { return a = a & b; }
__forceinline Vec3ia& operator &=( Vec3ia& a, const int& b ) { return a = a & b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const Vec3ia& b ) { return a = a | b; }
__forceinline Vec3ia& operator |=( Vec3ia& a, const int& b ) { return a = a | b; }
__forceinline Vec3ia& operator <<=( Vec3ia& a, const int& b ) { return a = a << b; }
__forceinline Vec3ia& operator >>=( Vec3ia& a, const int& b ) { return a = a >> b; }
////////////////////////////////////////////////////////////////////////////////
/// Reductions
////////////////////////////////////////////////////////////////////////////////
__forceinline int reduce_add(const Vec3ia& v) { return v.x+v.y+v.z; }
__forceinline int reduce_mul(const Vec3ia& v) { return v.x*v.y*v.z; }
__forceinline int reduce_min(const Vec3ia& v) { return sycl::min(sycl::min(v.x,v.y),v.z); }
__forceinline int reduce_max(const Vec3ia& v) { return sycl::max(sycl::max(v.x,v.y),v.z); }
////////////////////////////////////////////////////////////////////////////////
/// Comparison Operators
////////////////////////////////////////////////////////////////////////////////
__forceinline bool operator ==( const Vec3ia& a, const Vec3ia& b ) { return a.x == b.x & a.y == b.y & a.z == b.z; }
__forceinline bool operator !=( const Vec3ia& a, const Vec3ia& b ) { return a.x != b.x & a.y != b.y & a.z != b.z; }
/*
__forceinline bool operator < ( const Vec3ia& a, const Vec3ia& b ) {
if (a.x != b.x) return a.x < b.x;
if (a.y != b.y) return a.y < b.y;
if (a.z != b.z) return a.z < b.z;
return false;
}
*/
__forceinline Vec3ba eq_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x == b.x, a.y == b.y, a.z == b.z); }
__forceinline Vec3ba lt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x < b.x, a.y < b.y, a.z < b.z); }
__forceinline Vec3ba gt_mask( const Vec3ia& a, const Vec3ia& b ) { return Vec3ba(a.x > b.x, a.y > b.y, a.z > b.z); }
////////////////////////////////////////////////////////////////////////////////
/// Select
////////////////////////////////////////////////////////////////////////////////
__forceinline Vec3ia select( const Vec3ba& m, const Vec3ia& t, const Vec3ia& f ) {
const int x = m.x ? t.x : f.x;
const int y = m.y ? t.y : f.y;
const int z = m.z ? t.z : f.z;
return Vec3ia(x,y,z);
}
__forceinline Vec3ia min( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::min(a.x,b.x), sycl::min(a.y,b.y), sycl::min(a.z,b.z)); }
__forceinline Vec3ia max( const Vec3ia& a, const Vec3ia& b ) { return Vec3ia(sycl::max(a.x,b.x), sycl::max(a.y,b.y), sycl::max(a.z,b.z)); }
////////////////////////////////////////////////////////////////////////////////
/// Output Operators
////////////////////////////////////////////////////////////////////////////////
inline embree_ostream operator<<(embree_ostream cout, const Vec3ia& a) {
return cout;
}
}

View File

@ -3,7 +3,7 @@
#pragma once
#include "math.h"
#include "emath.h"
#include "vec3.h"
namespace embree
@ -221,6 +221,8 @@ namespace embree
{
template<> __forceinline Vec4<float>::Vec4( const Vec3fx& a ) { x = a.x; y = a.y; z = a.z; w = a.w; }
#if !defined(__SYCL_DEVICE_ONLY__)
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat4>::Vec4( const Vec3fx& a ) {
x = a.x; y = a.y; z = a.z; w = a.w;
@ -240,4 +242,25 @@ namespace embree
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4( const Vec3fx& a ) : x(a.x), y(a.y), z(a.z), w(a.w) {}
#endif
#else
#if defined(__SSE__)
template<> __forceinline Vec4<vfloat4>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX__)
template<> __forceinline Vec4<vfloat8>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#if defined(__AVX512F__)
template<> __forceinline Vec4<vfloat16>::Vec4(const Vec3fx& a) {
x = a.x; y = a.y; z = a.z; w = a.w;
}
#endif
#endif
}

View File

@ -34,6 +34,7 @@ __forceinline __m128 _mm_broadcast_ss (float const * mem_addr)
#define _MM_SET_EXCEPTION_MASK(x)
// #define _MM_SET_FLUSH_ZERO_MODE(x)
/*
__forceinline int _mm_getcsr()
{
return 0;
@ -43,6 +44,7 @@ __forceinline void _mm_mfence()
{
__sync_synchronize();
}
*/
__forceinline __m128i _mm_load4epu8_epi32(__m128i *ptr)
{

File diff suppressed because it is too large Load Diff

View File

@ -31,4 +31,3 @@
#if defined(__AVX512F__)
#include "avx512.h"
#endif

View File

@ -3,7 +3,7 @@
#pragma once
#include "../math/math.h"
#include "../math/emath.h"
/* include SSE wrapper classes */
#if defined(__SSE__) || defined(__ARM_NEON)

View File

@ -119,7 +119,7 @@ namespace embree
#if defined(__aarch64__)
template<int i0, int i1, int i2, int i3>
__forceinline vboolf4 shuffle(const vboolf4& v) {
return vreinterpretq_f32_u8(vqtbl1q_u8( vreinterpretq_u8_s32(v), _MN_SHUFFLE(i0, i1, i2, i3)));
return vreinterpretq_f32_u8(vqtbl1q_u8( vreinterpretq_u8_s32((int32x4_t)v.v), _MN_SHUFFLE(i0, i1, i2, i3)));
}
template<int i0, int i1, int i2, int i3>

View File

@ -316,6 +316,17 @@ namespace embree
return madd(t,b-a,a);
}
__forceinline bool isvalid (const vfloat16& v) {
return all((v > vfloat16(-FLT_LARGE)) & (v < vfloat16(+FLT_LARGE)));
}
__forceinline void xchg(vboolf16 m, vfloat16& a, vfloat16& b)
{
vfloat16 c = a;
a = select(m,b,a);
b = select(m,c,b);
}
////////////////////////////////////////////////////////////////////////////////
/// Rounding Functions
////////////////////////////////////////////////////////////////////////////////

View File

@ -32,6 +32,8 @@ namespace embree
__forceinline vfloat() {}
__forceinline vfloat(const vfloat4& other) { v = other.v; }
//__forceinline vfloat(const vfloat4& other) = default;
__forceinline vfloat4& operator =(const vfloat4& other) { v = other.v; return *this; }
__forceinline vfloat(__m128 a) : v(a) {}

View File

@ -3,7 +3,7 @@
#pragma once
#include "../math/math.h"
#include "../math/emath.h"
#define vboolf vboolf_impl
#define vboold vboold_impl

View File

@ -3,7 +3,7 @@
#pragma once
#include "../math/math.h"
#include "../math/emath.h"
#define vboolf vboolf_impl
#define vboold vboold_impl

View File

@ -12,6 +12,75 @@
namespace embree
{
size_t total_allocations = 0;
#if defined(EMBREE_SYCL_SUPPORT)
__thread sycl::context* tls_context_tutorial = nullptr;
__thread sycl::device* tls_device_tutorial = nullptr;
__thread sycl::context* tls_context_embree = nullptr;
__thread sycl::device* tls_device_embree = nullptr;
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device)
{
// -- GODOT start --
// if (tls_context_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
// if (tls_device_embree != nullptr) throw std::runtime_error("USM allocation already enabled");
if (tls_context_embree != nullptr) {
abort();
}
if (tls_device_embree != nullptr) {
abort();
}
// -- GODOT end --
tls_context_embree = context;
tls_device_embree = device;
}
void disableUSMAllocEmbree()
{
// -- GODOT start --
// if (tls_context_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
// if (tls_device_embree == nullptr) throw std::runtime_error("USM allocation not enabled");
if (tls_context_embree == nullptr) {
abort();
}
if (tls_device_embree == nullptr) {
abort();
}
// -- GODOT end --
tls_context_embree = nullptr;
tls_device_embree = nullptr;
}
void enableUSMAllocTutorial(sycl::context* context, sycl::device* device)
{
//if (tls_context_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
//if (tls_device_tutorial != nullptr) throw std::runtime_error("USM allocation already enabled");
tls_context_tutorial = context;
tls_device_tutorial = device;
}
void disableUSMAllocTutorial()
{
// -- GODOT start --
// if (tls_context_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
// if (tls_device_tutorial == nullptr) throw std::runtime_error("USM allocation not enabled");
if (tls_context_tutorial == nullptr) {
abort();
}
if (tls_device_tutorial == nullptr) {
abort();
}
// -- GODOT end --
tls_context_tutorial = nullptr;
tls_device_tutorial = nullptr;
}
#endif
void* alignedMalloc(size_t size, size_t align)
{
if (size == 0)
@ -19,13 +88,13 @@ namespace embree
assert((align & (align-1)) == 0);
void* ptr = _mm_malloc(size,align);
if (size != 0 && ptr == nullptr)
// -- GODOT start --
// if (size != 0 && ptr == nullptr)
// throw std::bad_alloc();
if (size != 0 && ptr == nullptr) {
abort();
}
// -- GODOT end --
return ptr;
}
@ -35,6 +104,81 @@ namespace embree
_mm_free(ptr);
}
#if defined(EMBREE_SYCL_SUPPORT)
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode)
{
assert(context);
assert(device);
if (size == 0)
return nullptr;
assert((align & (align-1)) == 0);
total_allocations++;
void* ptr = nullptr;
if (mode == EMBREE_USM_SHARED_DEVICE_READ_ONLY)
ptr = sycl::aligned_alloc_shared(align,size,*device,*context,sycl::ext::oneapi::property::usm::device_read_only());
else
ptr = sycl::aligned_alloc_shared(align,size,*device,*context);
// -- GODOT start --
// if (size != 0 && ptr == nullptr)
// throw std::bad_alloc();
if (size != 0 && ptr == nullptr) {
abort();
}
// -- GODOT end --
return ptr;
}
static MutexSys g_alloc_mutex;
void* alignedSYCLMalloc(size_t size, size_t align, EmbreeUSMMode mode)
{
if (tls_context_tutorial) return alignedSYCLMalloc(tls_context_tutorial, tls_device_tutorial, size, align, mode);
if (tls_context_embree ) return alignedSYCLMalloc(tls_context_embree, tls_device_embree, size, align, mode);
return nullptr;
}
void alignedSYCLFree(sycl::context* context, void* ptr)
{
assert(context);
if (ptr) {
sycl::free(ptr,*context);
}
}
void alignedSYCLFree(void* ptr)
{
if (tls_context_tutorial) return alignedSYCLFree(tls_context_tutorial, ptr);
if (tls_context_embree ) return alignedSYCLFree(tls_context_embree, ptr);
}
#endif
void* alignedUSMMalloc(size_t size, size_t align, EmbreeUSMMode mode)
{
#if defined(EMBREE_SYCL_SUPPORT)
if (tls_context_embree || tls_context_tutorial)
return alignedSYCLMalloc(size,align,mode);
else
#endif
return alignedMalloc(size,align);
}
void alignedUSMFree(void* ptr)
{
#if defined(EMBREE_SYCL_SUPPORT)
if (tls_context_embree || tls_context_tutorial)
return alignedSYCLFree(ptr);
else
#endif
return alignedFree(ptr);
}
static bool huge_pages_enabled = false;
static MutexSys os_init_mutex;
@ -133,7 +277,9 @@ namespace embree
char* ptr = (char*) VirtualAlloc(nullptr,bytes,flags,PAGE_READWRITE);
// -- GODOT start --
// if (ptr == nullptr) throw std::bad_alloc();
if (ptr == nullptr) abort();
if (ptr == nullptr) {
abort();
}
// -- GODOT end --
hugepages = false;
return ptr;
@ -150,10 +296,12 @@ namespace embree
if (bytesNew >= bytesOld)
return bytesOld;
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
// -- GODOT start --
// if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT))
// throw std::bad_alloc();
if (!VirtualFree((char*)ptr+bytesNew,bytesOld-bytesNew,MEM_DECOMMIT)) {
abort();
}
// -- GODOT end --
return bytesNew;
@ -164,10 +312,12 @@ namespace embree
if (bytes == 0)
return;
if (!VirtualFree(ptr,0,MEM_RELEASE))
// -- GODOT start --
// if (!VirtualFree(ptr,0,MEM_RELEASE))
// throw std::bad_alloc();
if (!VirtualFree(ptr,0,MEM_RELEASE)) {
abort();
}
// -- GODOT end --
}
@ -274,7 +424,9 @@ namespace embree
void* ptr = (char*) mmap(0, bytes, PROT_READ | PROT_WRITE, MAP_PRIVATE | MAP_ANON, -1, 0);
// -- GODOT start --
// if (ptr == MAP_FAILED) throw std::bad_alloc();
if (ptr == MAP_FAILED) abort();
if (ptr == MAP_FAILED) {
abort();
}
// -- GODOT end --
hugepages = false;
@ -291,10 +443,12 @@ namespace embree
if (bytesNew >= bytesOld)
return bytesOld;
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
// -- GODOT start --
// if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1)
// throw std::bad_alloc();
if (munmap((char*)ptr+bytesNew,bytesOld-bytesNew) == -1) {
abort();
}
// -- GODOT end --
return bytesNew;
@ -308,10 +462,12 @@ namespace embree
/* for hugepages we need to also align the size */
const size_t pageSize = hugepages ? PAGE_SIZE_2M : PAGE_SIZE_4K;
bytes = (bytes+pageSize-1) & ~(pageSize-1);
if (munmap(ptr,bytes) == -1)
// -- GODOT start --
// if (munmap(ptr,bytes) == -1)
// throw std::bad_alloc();
if (munmap(ptr,bytes) == -1) {
abort();
}
// -- GODOT end --
}

View File

@ -9,21 +9,73 @@
namespace embree
{
#if defined(EMBREE_SYCL_SUPPORT)
/* enables SYCL USM allocation */
void enableUSMAllocEmbree(sycl::context* context, sycl::device* device);
void enableUSMAllocTutorial(sycl::context* context, sycl::device* device);
/* disables SYCL USM allocation */
void disableUSMAllocEmbree();
void disableUSMAllocTutorial();
#endif
#define ALIGNED_STRUCT_(align) \
void* operator new(size_t size) { return alignedMalloc(size,align); } \
void operator delete(void* ptr) { alignedFree(ptr); } \
void* operator new[](size_t size) { return alignedMalloc(size,align); } \
void operator delete[](void* ptr) { alignedFree(ptr); }
#define ALIGNED_STRUCT_USM_(align) \
void* operator new(size_t size) { return alignedUSMMalloc(size,align); } \
void operator delete(void* ptr) { alignedUSMFree(ptr); } \
void* operator new[](size_t size) { return alignedUSMMalloc(size,align); } \
void operator delete[](void* ptr) { alignedUSMFree(ptr); }
#define ALIGNED_CLASS_(align) \
public: \
ALIGNED_STRUCT_(align) \
private:
#define ALIGNED_CLASS_USM_(align) \
public: \
ALIGNED_STRUCT_USM_(align) \
private:
enum EmbreeUSMMode {
EMBREE_USM_SHARED = 0,
EMBREE_USM_SHARED_DEVICE_READ_WRITE = 0,
EMBREE_USM_SHARED_DEVICE_READ_ONLY = 1
};
/*! aligned allocation */
void* alignedMalloc(size_t size, size_t align);
void alignedFree(void* ptr);
/*! aligned allocation using SYCL USM */
void* alignedUSMMalloc(size_t size, size_t align = 16, EmbreeUSMMode mode = EMBREE_USM_SHARED_DEVICE_READ_ONLY);
void alignedUSMFree(void* ptr);
#if defined(EMBREE_SYCL_SUPPORT)
/*! aligned allocation using SYCL USM */
void* alignedSYCLMalloc(sycl::context* context, sycl::device* device, size_t size, size_t align, EmbreeUSMMode mode);
void alignedSYCLFree(sycl::context* context, void* ptr);
// deleter functor to use as deleter in std unique or shared pointers that
// capture raw pointers created by sycl::malloc and it's variants
template<typename T>
struct sycl_deleter
{
void operator()(T const* ptr)
{
alignedUSMFree((void*)ptr);
}
};
#endif
/*! allocator that performs aligned allocations */
template<typename T, size_t alignment>
struct aligned_allocator
@ -95,6 +147,37 @@ namespace embree
bool hugepages;
};
/*! allocator that newer performs allocations */
template<typename T>
struct no_allocator
{
typedef T value_type;
typedef T* pointer;
typedef const T* const_pointer;
typedef T& reference;
typedef const T& const_reference;
typedef std::size_t size_type;
typedef std::ptrdiff_t difference_type;
__forceinline pointer allocate( size_type n ) {
// -- GODOT start --
// throw std::runtime_error("no allocation supported");
abort();
// -- GODOT end --
}
__forceinline void deallocate( pointer p, size_type n ) {
}
__forceinline void construct( pointer p, const_reference val ) {
new (p) T(val);
}
__forceinline void destroy( pointer p ) {
p->~T();
}
};
/*! allocator for IDs */
template<typename T, size_t max_id>
struct IDPool

View File

@ -36,7 +36,7 @@ namespace embree
};
template<typename T>
__forceinline void atomic_min(std::atomic<T>& aref, const T& bref)
__forceinline void _atomic_min(std::atomic<T>& aref, const T& bref)
{
const T b = bref.load();
while (true) {
@ -47,7 +47,7 @@ namespace embree
}
template<typename T>
__forceinline void atomic_max(std::atomic<T>& aref, const T& bref)
__forceinline void _atomic_max(std::atomic<T>& aref, const T& bref)
{
const T b = bref.load();
while (true) {

View File

@ -34,7 +34,7 @@ namespace embree
void* opaque;
};
/*! fast active barrier using atomitc counter */
/*! fast active barrier using atomic counter */
struct BarrierActive
{
public:

View File

@ -1,7 +1,7 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#include "string.h"
#include "estring.h"
#include <algorithm>
#include <ctype.h>

View File

@ -28,6 +28,42 @@ namespace embree
std::streamsize precision;
};
struct IndentOStream : public std::streambuf
{
explicit IndentOStream(std::ostream &ostream, int indent = 2)
: streambuf(ostream.rdbuf())
, start_of_line(true)
, ident_str(indent, ' ')
, stream(&ostream)
{
// set streambuf of ostream to this and save original streambuf
stream->rdbuf(this);
}
virtual ~IndentOStream()
{
if (stream != NULL) {
// restore old streambuf
stream->rdbuf(streambuf);
}
}
protected:
virtual int overflow(int ch) {
if (start_of_line && ch != '\n') {
streambuf->sputn(ident_str.data(), ident_str.size());
}
start_of_line = ch == '\n';
return streambuf->sputc(ch);
}
private:
std::streambuf *streambuf;
bool start_of_line;
std::string ident_str;
std::ostream *stream;
};
std::string toLowerCase(const std::string& s);
std::string toUpperCase(const std::string& s);

View File

@ -64,7 +64,7 @@ namespace embree
/// Windows Platform
////////////////////////////////////////////////////////////////////////////////
#if defined(__WIN32__)
#if defined(__WIN32__) && !defined(__INTEL_LLVM_COMPILER)
__forceinline size_t read_tsc()
{
@ -89,7 +89,7 @@ namespace embree
#endif
}
#if defined(__X86_64__)
#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bsf(size_t v) {
#if defined(__AVX2__)
return _tzcnt_u64(v);
@ -113,7 +113,7 @@ namespace embree
return i;
}
#if defined(__X86_64__)
#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bscf(size_t& v)
{
size_t i = bsf(v);
@ -138,7 +138,7 @@ namespace embree
#endif
}
#if defined(__X86_64__)
#if defined(__X86_64__) || defined (__aarch64__)
__forceinline size_t bsr(size_t v) {
#if defined(__AVX2__)
return 63 -_lzcnt_u64(v);
@ -196,49 +196,6 @@ namespace embree
#else
#if defined(__i386__) && defined(__PIC__)
__forceinline void __cpuid(int out[4], int op)
{
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "0"(op));
}
__forceinline void __cpuid_count(int out[4], int op1, int op2)
{
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
: "0" (op1), "2" (op2));
}
#elif defined(__X86_ASM__)
__forceinline void __cpuid(int out[4], int op) {
#if defined(__ARM_NEON)
if (op == 0) { // Get CPU name
out[0] = 0x41524d20;
out[1] = 0x41524d20;
out[2] = 0x41524d20;
out[3] = 0x41524d20;
}
#else
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
#endif
}
#if !defined(__ARM_NEON)
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
}
#endif
#endif
__forceinline uint64_t read_tsc() {
#if defined(__X86_ASM__)
uint32_t high,low;
@ -264,6 +221,13 @@ namespace embree
#endif
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
__forceinline unsigned int bsf(unsigned v) {
return sycl::ctz(v);
}
#else
#if defined(__64BIT__)
__forceinline unsigned bsf(unsigned v)
{
@ -280,6 +244,13 @@ namespace embree
#endif
}
#endif
#endif
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
__forceinline size_t bsf(size_t v) {
return sycl::ctz(v);
}
#else
__forceinline size_t bsf(size_t v) {
#if defined(__AVX2__) && !defined(__aarch64__)
@ -294,6 +265,7 @@ namespace embree
return __builtin_ctzl(v);
#endif
}
#endif
__forceinline int bscf(int& v)
{
@ -434,6 +406,41 @@ namespace embree
#endif
#if !defined(__WIN32__)
#if defined(__i386__) && defined(__PIC__)
__forceinline void __cpuid(int out[4], int op)
{
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a"(out[0]), "=r"(out[1]), "=c"(out[2]), "=d"(out[3])
: "0"(op));
}
__forceinline void __cpuid_count(int out[4], int op1, int op2)
{
asm volatile ("xchg{l}\t{%%}ebx, %1\n\t"
"cpuid\n\t"
"xchg{l}\t{%%}ebx, %1\n\t"
: "=a" (out[0]), "=r" (out[1]), "=c" (out[2]), "=d" (out[3])
: "0" (op1), "2" (op2));
}
#elif defined(__X86_ASM__)
__forceinline void __cpuid(int out[4], int op) {
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op));
}
__forceinline void __cpuid_count(int out[4], int op1, int op2) {
asm volatile ("cpuid" : "=a"(out[0]), "=b"(out[1]), "=c"(out[2]), "=d"(out[3]) : "a"(op1), "c"(op2));
}
#endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// All Platforms
////////////////////////////////////////////////////////////////////////////////
@ -459,6 +466,14 @@ namespace embree
#endif
#endif
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
__forceinline unsigned int popcnt(unsigned int in) {
return sycl::popcount(in);
}
#else
#if defined(__SSE4_2__) || defined(__ARM_NEON)
__forceinline int popcnt(int in) {
@ -477,6 +492,8 @@ namespace embree
#endif
#endif
#if defined(__X86_ASM__)
__forceinline uint64_t rdtsc()
{

View File

@ -87,7 +87,7 @@ namespace embree
class PaddedSpinLock : public SpinLock
{
private:
char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
MAYBE_UNUSED char padding[CPU_CACHELINE_SIZE - sizeof(SpinLock)];
};
/*! safe mutex lock and unlock helper */
template<typename Mutex> class Lock {

View File

@ -3,7 +3,9 @@
#pragma once
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include <cstddef>
#include <cassert>
@ -18,6 +20,30 @@
#include <cstring>
#include <stdint.h>
#include <functional>
#include <mutex>
#if defined(EMBREE_SYCL_SUPPORT)
#define __SYCL_USE_NON_VARIADIC_SPIRV_OCL_PRINTF__
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wdeprecated-declarations"
#pragma clang diagnostic ignored "-W#pragma-messages"
#include <sycl/sycl.hpp>
#pragma clang diagnostic pop
#include "sycl.h"
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
#define CONSTANT __attribute__((opencl_constant))
#else
#define CONSTANT
#endif
#endif
////////////////////////////////////////////////////////////////////////////////
/// detect platform
@ -115,7 +141,7 @@
#else
#define __restrict__ //__restrict // causes issues with MSVC
#endif
#if !defined(__thread)
#if !defined(__thread) && !defined(__INTEL_LLVM_COMPILER)
#define __thread __declspec(thread)
#endif
#if !defined(__aligned)
@ -148,6 +174,10 @@
#define MAYBE_UNUSED
#endif
#if !defined(_unused)
#define _unused(x) ((void)(x))
#endif
#if defined(_MSC_VER) && (_MSC_VER < 1900) // before VS2015 deleted functions are not supported properly
#define DELETED
#else
@ -155,7 +185,7 @@
#endif
#if !defined(likely)
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER)
#if defined(_MSC_VER) && !defined(__INTEL_COMPILER) || defined(__SYCL_DEVICE_ONLY__)
#define likely(expr) (expr)
#define unlikely(expr) (expr)
#else
@ -171,22 +201,27 @@
/* debug printing macros */
#define STRING(x) #x
#define TOSTRING(x) STRING(x)
#define PING embree_cout << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
#define PING embree_cout_uniform << __FILE__ << " (" << __LINE__ << "): " << __FUNCTION__ << embree_endl
#define PRINT(x) embree_cout << STRING(x) << " = " << (x) << embree_endl
#define PRINT2(x,y) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
#define PRINT3(x,y,z) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
#define PRINT4(x,y,z,w) embree_cout << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
#define UPRINT(x) embree_cout_uniform << STRING(x) << " = " << (x) << embree_endl
#define UPRINT2(x,y) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << embree_endl
#define UPRINT3(x,y,z) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << embree_endl
#define UPRINT4(x,y,z,w) embree_cout_uniform << STRING(x) << " = " << (x) << ", " << STRING(y) << " = " << (y) << ", " << STRING(z) << " = " << (z) << ", " << STRING(w) << " = " << (w) << embree_endl
#if defined(DEBUG) // only report file and line in debug mode
// -- GODOT start --
// #define THROW_RUNTIME_ERROR(str)
// #define THROW_RUNTIME_ERROR(str) \
// throw std::runtime_error(std::string(__FILE__) + " (" + toString(__LINE__) + "): " + std::string(str));
#define THROW_RUNTIME_ERROR(str) \
printf("%s (%d): %s", __FILE__, __LINE__, std::string(str).c_str()), abort();
// -- GODOT end --
#else
// -- GODOT start --
// #define THROW_RUNTIME_ERROR(str)
// #define THROW_RUNTIME_ERROR(str) \
// throw std::runtime_error(str);
#define THROW_RUNTIME_ERROR(str) \
abort();
@ -324,12 +359,208 @@ __forceinline std::string toString(long long value) {
#define ENABLE_DEPRECATED_WARNING __pragma(warning (enable : 4996)) // warning: function was declared deprecated
#endif
/* embree output stream */
////////////////////////////////////////////////////////////////////////////////
/// SYCL specific
////////////////////////////////////////////////////////////////////////////////
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
#define sycl_printf0(format, ...) { \
static const CONSTANT char fmt[] = format; \
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
}
#define sycl_printf0_(format) { \
static const CONSTANT char fmt[] = format; \
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true))) \
sycl::ext::oneapi::experimental::printf(fmt); \
}
#else
#define sycl_printf0(format, ...) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
}
#define sycl_printf0_(format) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt); \
}
#endif
#define sycl_printf(format, ...) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt, __VA_ARGS__ ); \
}
#define sycl_printf_(format) { \
static const CONSTANT char fmt[] = format; \
sycl::ext::oneapi::experimental::printf(fmt); \
}
#if defined(EMBREE_SYCL_SUPPORT) && defined(__SYCL_DEVICE_ONLY__)
namespace embree
{
struct sycl_ostream_ {
sycl_ostream_ (bool uniform) : uniform(uniform) {}
bool uniform = false;
};
struct sycl_endl_ {};
#define embree_ostream embree::sycl_ostream_
#define embree_cout embree::sycl_ostream_(false)
#define embree_cout_uniform embree::sycl_ostream_(true)
#define embree_endl embree::sycl_endl_()
inline sycl_ostream_ operator <<(sycl_ostream_ cout, int i)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%i",i);
}
else
sycl_printf("%i ",i);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, unsigned int i)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%u",i);
} else
sycl_printf("%u ",i);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, float f)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%f",f);
} else
sycl_printf("%f ",f);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, double d)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%f",d);
} else
sycl_printf("%f ",d);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, uint64_t l)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%lu",l);
} else
sycl_printf("%lu ",l);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, long l)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%l",l);
} else
sycl_printf("%l ",l);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, void* p)
{
if (cout.uniform) {
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%p",p);
} else
sycl_printf("%p ",p);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, const char* c)
{
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf("%s",c);
return cout;
}
inline sycl_ostream_ operator <<(sycl_ostream_ cout, sycl_endl_)
{
if (get_sub_group_local_id() == sycl::ctz(intel_sub_group_ballot(true)))
sycl_printf_("\n");
return cout;
}
}
#else
#define embree_ostream std::ostream&
#define embree_cout std::cout
#define embree_cout_uniform std::cout
#define embree_endl std::endl
#endif
#if defined(EMBREE_SYCL_SUPPORT)
/* printing out sycle vector types */
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float4& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float3& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::float2& v) {
return out << "(" << v.x() << "," << v.y() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int4& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int3& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::int2& v) {
return out << "(" << v.x() << "," << v.y() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint4& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << "," << v.w() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint3& v) {
return out << "(" << v.x() << "," << v.y() << "," << v.z() << ")";
}
__forceinline embree_ostream operator<<(embree_ostream out, const sycl::uint2& v) {
return out << "(" << v.x() << "," << v.y() << ")";
}
#endif
inline void tab(std::ostream& cout, int n) {
for (int i=0; i<n; i++) cout << " ";
}
inline std::string tab(int depth) {
return std::string(2*depth,' ');
}
////////////////////////////////////////////////////////////////////////////////
/// Some macros for static profiling
////////////////////////////////////////////////////////////////////////////////

307
thirdparty/embree/common/sys/sycl.h vendored Normal file
View File

@ -0,0 +1,307 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "platform.h"
using sycl::float16;
using sycl::float8;
using sycl::float4;
using sycl::float3;
using sycl::float2;
using sycl::int16;
using sycl::int8;
using sycl::int4;
using sycl::int3;
using sycl::int2;
using sycl::uint16;
using sycl::uint8;
using sycl::uint4;
using sycl::uint3;
using sycl::uint2;
using sycl::uchar16;
using sycl::uchar8;
using sycl::uchar4;
using sycl::uchar3;
using sycl::uchar2;
using sycl::ushort16;
using sycl::ushort8;
using sycl::ushort4;
using sycl::ushort3;
using sycl::ushort2;
#ifdef __SYCL_DEVICE_ONLY__
#define GLOBAL __attribute__((opencl_global))
#define LOCAL __attribute__((opencl_local))
SYCL_EXTERNAL extern int work_group_reduce_add(int x);
SYCL_EXTERNAL extern float work_group_reduce_min(float x);
SYCL_EXTERNAL extern float work_group_reduce_max(float x);
SYCL_EXTERNAL extern float atomic_min(volatile GLOBAL float *p, float val);
SYCL_EXTERNAL extern float atomic_min(volatile LOCAL float *p, float val);
SYCL_EXTERNAL extern float atomic_max(volatile GLOBAL float *p, float val);
SYCL_EXTERNAL extern float atomic_max(volatile LOCAL float *p, float val);
SYCL_EXTERNAL extern "C" unsigned int intel_sub_group_ballot(bool valid);
SYCL_EXTERNAL extern "C" void __builtin_IB_assume_uniform(void *p);
// Load message caching control
enum LSC_LDCC {
LSC_LDCC_DEFAULT,
LSC_LDCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
LSC_LDCC_L1UC_L3C, // Override to L1 uncached and L3 cached
LSC_LDCC_L1C_L3UC, // Override to L1 cached and L3 uncached
LSC_LDCC_L1C_L3C, // Override to L1 cached and L3 cached
LSC_LDCC_L1S_L3UC, // Override to L1 streaming load and L3 uncached
LSC_LDCC_L1S_L3C, // Override to L1 streaming load and L3 cached
LSC_LDCC_L1IAR_L3C, // Override to L1 invalidate-after-read, and L3 cached
};
// Store message caching control (also used for atomics)
enum LSC_STCC {
LSC_STCC_DEFAULT,
LSC_STCC_L1UC_L3UC, // Override to L1 uncached and L3 uncached
LSC_STCC_L1UC_L3WB, // Override to L1 uncached and L3 written back
LSC_STCC_L1WT_L3UC, // Override to L1 written through and L3 uncached
LSC_STCC_L1WT_L3WB, // Override to L1 written through and L3 written back
LSC_STCC_L1S_L3UC, // Override to L1 streaming and L3 uncached
LSC_STCC_L1S_L3WB, // Override to L1 streaming and L3 written back
LSC_STCC_L1WB_L3WB, // Override to L1 written through and L3 written back
};
///////////////////////////////////////////////////////////////////////
// LSC Loads
///////////////////////////////////////////////////////////////////////
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uchar_to_uint (const GLOBAL uint8_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D8U32
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_ushort_to_uint(const GLOBAL uint16_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D16U32
SYCL_EXTERNAL /* extern "C" */ uint32_t __builtin_IB_lsc_load_global_uint (const GLOBAL uint32_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V1
SYCL_EXTERNAL /* extern "C" */ sycl::uint2 __builtin_IB_lsc_load_global_uint2 (const GLOBAL sycl::uint2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V2
SYCL_EXTERNAL /* extern "C" */ sycl::uint3 __builtin_IB_lsc_load_global_uint3 (const GLOBAL sycl::uint3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V3
SYCL_EXTERNAL /* extern "C" */ sycl::uint4 __builtin_IB_lsc_load_global_uint4 (const GLOBAL sycl::uint4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V4
SYCL_EXTERNAL /* extern "C" */ sycl::uint8 __builtin_IB_lsc_load_global_uint8 (const GLOBAL sycl::uint8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D32V8
SYCL_EXTERNAL /* extern "C" */ uint64_t __builtin_IB_lsc_load_global_ulong (const GLOBAL uint64_t *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V1
SYCL_EXTERNAL /* extern "C" */ sycl::ulong2 __builtin_IB_lsc_load_global_ulong2 (const GLOBAL sycl::ulong2 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V2
SYCL_EXTERNAL /* extern "C" */ sycl::ulong3 __builtin_IB_lsc_load_global_ulong3 (const GLOBAL sycl::ulong3 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V3
SYCL_EXTERNAL /* extern "C" */ sycl::ulong4 __builtin_IB_lsc_load_global_ulong4 (const GLOBAL sycl::ulong4 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V4
SYCL_EXTERNAL /* extern "C" */ sycl::ulong8 __builtin_IB_lsc_load_global_ulong8 (const GLOBAL sycl::ulong8 *base, int elemOff, enum LSC_LDCC cacheOpt); //D64V8
// global address space
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uchar_from_uint (GLOBAL uint8_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D8U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ushort_from_uint(GLOBAL uint16_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D16U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint (GLOBAL uint32_t *base, int immElemOff, uint32_t val, enum LSC_STCC cacheOpt); //D32V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint2 (GLOBAL sycl::uint2 *base, int immElemOff, sycl::uint2 val, enum LSC_STCC cacheOpt); //D32V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint3 (GLOBAL sycl::uint3 *base, int immElemOff, sycl::uint3 val, enum LSC_STCC cacheOpt); //D32V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint4 (GLOBAL sycl::uint4 *base, int immElemOff, sycl::uint4 val, enum LSC_STCC cacheOpt); //D32V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_uint8 (GLOBAL sycl::uint8 *base, int immElemOff, sycl::uint8 val, enum LSC_STCC cacheOpt); //D32V8
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong (GLOBAL uint64_t *base, int immElemOff, uint64_t val, enum LSC_STCC cacheOpt); //D64V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong2 (GLOBAL sycl::ulong2 *base, int immElemOff, sycl::ulong2 val, enum LSC_STCC cacheOpt); //D64V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong3 (GLOBAL sycl::ulong3 *base, int immElemOff, sycl::ulong3 val, enum LSC_STCC cacheOpt); //D64V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong4 (GLOBAL sycl::ulong4 *base, int immElemOff, sycl::ulong4 val, enum LSC_STCC cacheOpt); //D64V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_store_global_ulong8 (GLOBAL sycl::ulong8 *base, int immElemOff, sycl::ulong8 val, enum LSC_STCC cacheOpt); //D64V8
///////////////////////////////////////////////////////////////////////
// prefetching
///////////////////////////////////////////////////////////////////////
//
// LSC Pre-Fetch Load functions with CacheControls
// global address space
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uchar (const GLOBAL uint8_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D8U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ushort(const GLOBAL uint16_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D16U32
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint (const GLOBAL uint32_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint2 (const GLOBAL sycl::uint2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint3 (const GLOBAL sycl::uint3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint4 (const GLOBAL sycl::uint4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_uint8 (const GLOBAL sycl::uint8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D32V8
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong (const GLOBAL uint64_t *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V1
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong2(const GLOBAL sycl::ulong2 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V2
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong3(const GLOBAL sycl::ulong3 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V3
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong4(const GLOBAL sycl::ulong4 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V4
SYCL_EXTERNAL extern "C" void __builtin_IB_lsc_prefetch_global_ulong8(const GLOBAL sycl::ulong8 *base, int immElemOff, enum LSC_LDCC cacheOpt); //D64V8
#else
#define GLOBAL
#define LOCAL
/* dummy functions for host */
inline int work_group_reduce_add(int x) { return x; }
inline float work_group_reduce_min(float x) { return x; }
inline float work_group_reduce_max(float x) { return x; }
inline float atomic_min(volatile float *p, float val) { return val; };
inline float atomic_max(volatile float *p, float val) { return val; };
inline uint32_t intel_sub_group_ballot(bool valid) { return 0; }
#endif
/* creates a temporary that is enforced to be uniform */
#define SYCL_UNIFORM_VAR(Ty,tmp,k) \
Ty tmp##_data; \
Ty* p##tmp##_data = (Ty*) sub_group_broadcast((uint64_t)&tmp##_data,k); \
Ty& tmp = *p##tmp##_data;
#if !defined(__forceinline)
#define __forceinline inline __attribute__((always_inline))
#endif
#if __SYCL_COMPILER_VERSION < 20210801
#define all_of_group all_of
#define any_of_group any_of
#define none_of_group none_of
#define group_broadcast broadcast
#define reduce_over_group reduce
#define exclusive_scan_over_group exclusive_scan
#define inclusive_scan_over_group inclusive_scan
#endif
namespace embree
{
template<typename T>
__forceinline T cselect(const bool mask, const T &a, const T &b)
{
return sycl::select(b,a,(int)mask);
}
template<typename T, typename M>
__forceinline T cselect(const M &mask, const T &a, const T &b)
{
return sycl::select(b,a,mask);
}
__forceinline const sycl::sub_group this_sub_group() {
return sycl::ext::oneapi::experimental::this_sub_group();
}
__forceinline const uint32_t get_sub_group_local_id() {
return this_sub_group().get_local_id()[0];
}
__forceinline const uint32_t get_sub_group_size() {
return this_sub_group().get_max_local_range().size();
}
__forceinline const uint32_t get_sub_group_id() {
return this_sub_group().get_group_id()[0];
}
__forceinline const uint32_t get_num_sub_groups() {
return this_sub_group().get_group_range().size();
}
__forceinline uint32_t sub_group_ballot(bool pred) {
return intel_sub_group_ballot(pred);
}
__forceinline bool sub_group_all_of(bool pred) {
return sycl::all_of_group(this_sub_group(),pred);
}
__forceinline bool sub_group_any_of(bool pred) {
return sycl::any_of_group(this_sub_group(),pred);
}
__forceinline bool sub_group_none_of(bool pred) {
return sycl::none_of_group(this_sub_group(),pred);
}
template <typename T> __forceinline T sub_group_broadcast(T x, sycl::id<1> local_id) {
return sycl::group_broadcast<sycl::sub_group>(this_sub_group(),x,local_id);
}
template <typename T> __forceinline T sub_group_make_uniform(T x) {
return sub_group_broadcast(x,sycl::ctz(intel_sub_group_ballot(true)));
}
__forceinline void assume_uniform_array(void* ptr) {
#ifdef __SYCL_DEVICE_ONLY__
__builtin_IB_assume_uniform(ptr);
#endif
}
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, BinaryOperation binary_op) {
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_reduce(T x, T init, BinaryOperation binary_op) {
return sycl::reduce_over_group<sycl::sub_group>(this_sub_group(),x,init,binary_op);
}
template <typename T> __forceinline T sub_group_reduce_min(T x, T init) {
return sub_group_reduce(x, init, sycl::ext::oneapi::minimum<T>());
}
template <typename T> __forceinline T sub_group_reduce_min(T x) {
return sub_group_reduce(x, sycl::ext::oneapi::minimum<T>());
}
template <typename T> __forceinline T sub_group_reduce_max(T x) {
return sub_group_reduce(x, sycl::ext::oneapi::maximum<T>());
}
template <typename T> __forceinline T sub_group_reduce_add(T x) {
return sub_group_reduce(x, sycl::ext::oneapi::plus<T>());
}
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, BinaryOperation binary_op) {
return sycl::exclusive_scan_over_group(this_sub_group(),x,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan_min(T x) {
return sub_group_exclusive_scan(x,sycl::ext::oneapi::minimum<T>());
}
template <typename T, class BinaryOperation> __forceinline T sub_group_exclusive_scan(T x, T init, BinaryOperation binary_op) {
return sycl::exclusive_scan_over_group(this_sub_group(),x,init,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op) {
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op);
}
template <typename T, class BinaryOperation> __forceinline T sub_group_inclusive_scan(T x, BinaryOperation binary_op, T init) {
return sycl::inclusive_scan_over_group(this_sub_group(),x,binary_op,init);
}
template <typename T> __forceinline T sub_group_shuffle(T x, sycl::id<1> local_id) {
return this_sub_group().shuffle(x, local_id);
}
template <typename T> __forceinline T sub_group_shuffle_down(T x, uint32_t delta) {
return this_sub_group().shuffle_down(x, delta);
}
template <typename T> __forceinline T sub_group_shuffle_up(T x, uint32_t delta) {
return this_sub_group().shuffle_up(x, delta);
}
template <typename T> __forceinline T sub_group_load(const void* src) {
return this_sub_group().load(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)src));
}
template <typename T> __forceinline void sub_group_store(void* dst, const T& x) {
this_sub_group().store(sycl::multi_ptr<T,sycl::access::address_space::global_space>((T*)dst),x);
}
}
#if __SYCL_COMPILER_VERSION < 20210801
#undef all_of_group
#undef any_of_group
#undef none_of_group
#undef group_broadcast
#undef reduce_over_group
#undef exclusive_scan_over_group
#undef inclusive_scan_over_group
#endif

View File

@ -1,9 +1,15 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if defined(__INTEL_LLVM_COMPILER)
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#endif
#include "sysinfo.h"
#include "intrinsics.h"
#include "string.h"
#include "estring.h"
#include "ref.h"
#if defined(__FREEBSD__)
#include <sys/cpuset.h>
@ -690,3 +696,6 @@ namespace embree
}
#endif
#if defined(__INTEL_LLVM_COMPILER)
#pragma clang diagnostic pop
#endif

View File

@ -3,7 +3,7 @@
#include "thread.h"
#include "sysinfo.h"
#include "string.h"
#include "estring.h"
#include <iostream>
#if defined(__ARM_NEON)

View File

@ -8,6 +8,8 @@
namespace embree
{
class Device;
template<typename T, typename allocator>
class vector_t
{
@ -26,6 +28,12 @@ namespace embree
__forceinline explicit vector_t (M alloc, size_t sz)
: alloc(alloc), size_active(0), size_alloced(0), items(nullptr) { internal_resize_init(sz); }
__forceinline vector_t (Device* alloc)
: vector_t(alloc,0) {}
__forceinline vector_t(void* data, size_t bytes)
: size_active(0), size_alloced(bytes/sizeof(T)), items((T*)data) {}
__forceinline ~vector_t() {
clear();
}
@ -65,6 +73,10 @@ namespace embree
return *this;
}
__forceinline allocator& getAlloc() {
return alloc;
}
/********************** Iterators ****************************/
__forceinline iterator begin() { return items; };
@ -215,6 +227,10 @@ namespace embree
if (new_alloced <= size_alloced)
return size_alloced;
/* if current size is 0 allocate exact requested size */
if (size_alloced == 0)
return new_alloced;
/* resize to next power of 2 otherwise */
size_t new_size_alloced = size_alloced;
while (new_size_alloced < new_alloced) {
@ -241,4 +257,8 @@ namespace embree
/*! vector class that performs OS allocations */
template<typename T>
using ovector = vector_t<T,os_allocator<T> >;
/*! vector class with externally managed data buffer */
template<typename T>
using evector = vector_t<T,no_allocator<T>>;
}

View File

@ -3,7 +3,7 @@
#pragma once
#if defined(TASKING_INTERNAL)
#if defined(TASKING_INTERNAL) && !defined(TASKING_TBB)
# include "taskschedulerinternal.h"
#elif defined(TASKING_TBB)
# include "taskschedulertbb.h"

View File

@ -2,7 +2,7 @@
// SPDX-License-Identifier: Apache-2.0
#include "taskschedulerinternal.h"
#include "../math/math.h"
#include "../math/emath.h"
#include "../sys/sysinfo.h"
#include <algorithm>
@ -50,11 +50,11 @@ namespace embree
thread.task = this;
// -- GODOT start --
// try {
// if (thread.scheduler->cancellingException == nullptr)
// if (context->cancellingException == nullptr)
closure->execute();
// } catch (...) {
// if (thread.scheduler->cancellingException == nullptr)
// thread.scheduler->cancellingException = std::current_exception();
// if (context->cancellingException == nullptr)
// context->cancellingException = std::current_exception();
// }
// -- GODOT end --
thread.task = prevTask;
@ -152,7 +152,8 @@ namespace embree
{
Lock<MutexSys> lock(g_mutex);
assert(newNumThreads);
newNumThreads = min(newNumThreads, (size_t) getNumberOfLogicalThreads());
if (newNumThreads == std::numeric_limits<size_t>::max())
newNumThreads = (size_t) getNumberOfLogicalThreads();
numThreads = newNumThreads;
if (!startThreads && !running) return;
@ -232,7 +233,8 @@ namespace embree
TaskScheduler::TaskScheduler()
: threadCounter(0), anyTasksRunning(0), hasRootTask(false)
{
threadLocal.resize(2*getNumberOfLogicalThreads()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommitScene the worker threads also join. When disallowing rtcCommitScene to join a build we can remove the 2x.
assert(threadPool);
threadLocal.resize(2 * TaskScheduler::threadCount()); // FIXME: this has to be 2x as in the compatibility join mode with rtcCommitScene the worker threads also join. When disallowing rtcCommitScene to join a build we can remove the 2x.
for (size_t i=0; i<threadLocal.size(); i++)
threadLocal[i].store(nullptr);
}
@ -293,11 +295,7 @@ namespace embree
size_t threadIndex = allocThreadIndex();
condition.wait(mutex, [&] () { return hasRootTask.load(); });
mutex.unlock();
// -- GODOT start --
// std::exception_ptr except = thread_loop(threadIndex);
// if (except != nullptr) std::rethrow_exception(except);
thread_loop(threadIndex);
// -- GODOT end --
}
void TaskScheduler::reset() {
@ -321,18 +319,15 @@ namespace embree
return old;
}
dll_export bool TaskScheduler::wait()
dll_export void TaskScheduler::wait()
{
Thread* thread = TaskScheduler::thread();
if (thread == nullptr) return true;
if (thread == nullptr)
return;
while (thread->tasks.execute_local_internal(*thread,thread->task)) {};
return thread->scheduler->cancellingException == nullptr;
}
// -- GODOT start --
// std::exception_ptr TaskScheduler::thread_loop(size_t threadIndex)
void TaskScheduler::thread_loop(size_t threadIndex)
// -- GODOT end --
{
/* allocate thread structure */
std::unique_ptr<Thread> mthread(new Thread(threadIndex,this)); // too large for stack allocation
@ -354,11 +349,6 @@ namespace embree
threadLocal[threadIndex].store(nullptr);
swapThread(oldThread);
/* remember exception to throw */
// -- GODOT start --
// std::exception_ptr except = nullptr;
// if (cancellingException != nullptr) except = cancellingException;
// -- GODOT end --
/* wait for all threads to terminate */
threadCounter--;
#if defined(__WIN32__)
@ -376,10 +366,6 @@ namespace embree
yield();
#endif
}
// -- GODOT start --
// return except;
return;
// -- GODOT end --
}
bool TaskScheduler::steal_from_other_threads(Thread& thread)

View File

@ -12,7 +12,7 @@
#include "../sys/ref.h"
#include "../sys/atomic.h"
#include "../math/range.h"
#include "../../include/embree3/rtcore.h"
#include "../../include/embree4/rtcore.h"
#include <list>
@ -38,6 +38,13 @@ namespace embree
virtual void execute() = 0;
};
struct TaskGroupContext {
TaskGroupContext() : cancellingException(nullptr) {}
std::exception_ptr cancellingException;
};
/*! builds a task interface from a closure */
template<typename Closure>
struct ClosureTaskFunction : public TaskFunction
@ -76,16 +83,16 @@ namespace embree
: state(DONE) {}
/*! construction of new task */
__forceinline Task (TaskFunction* closure, Task* parent, size_t stackPtr, size_t N)
: dependencies(1), stealable(true), closure(closure), parent(parent), stackPtr(stackPtr), N(N)
__forceinline Task (TaskFunction* closure, Task* parent, TaskGroupContext* context, size_t stackPtr, size_t N)
: dependencies(1), stealable(true), closure(closure), parent(parent), context(context), stackPtr(stackPtr), N(N)
{
if (parent) parent->add_dependencies(+1);
switch_state(DONE,INITIALIZED);
}
/*! construction of stolen task, stealing thread will decrement initial dependency */
__forceinline Task (TaskFunction* closure, Task* parent)
: dependencies(1), stealable(false), closure(closure), parent(parent), stackPtr(-1), N(1)
__forceinline Task (TaskFunction* closure, Task* parent, TaskGroupContext* context)
: dependencies(1), stealable(false), closure(closure), parent(parent), context(context), stackPtr(-1), N(1)
{
switch_state(DONE,INITIALIZED);
}
@ -95,7 +102,7 @@ namespace embree
{
if (!stealable) return false;
if (!try_switch_state(INITIALIZED,DONE)) return false;
new (&child) Task(closure, this);
new (&child) Task(closure, this, context);
return true;
}
@ -110,6 +117,7 @@ namespace embree
std::atomic<bool> stealable; //!< true if task can be stolen
TaskFunction* closure; //!< the closure to execute
Task* parent; //!< parent task to signal when we are finished
TaskGroupContext* context;
size_t stackPtr; //!< stack location where closure is stored
size_t N; //!< approximative size of task
};
@ -122,28 +130,32 @@ namespace embree
__forceinline void* alloc(size_t bytes, size_t align = 64)
{
size_t ofs = bytes + ((align - stackPtr) & (align-1));
if (stackPtr + ofs > CLOSURE_STACK_SIZE)
// -- GODOT start --
// if (stackPtr + ofs > CLOSURE_STACK_SIZE)
// throw std::runtime_error("closure stack overflow");
if (stackPtr + ofs > CLOSURE_STACK_SIZE) {
abort();
}
// -- GODOT end --
stackPtr += ofs;
return &stack[stackPtr-bytes];
}
template<typename Closure>
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure)
__forceinline void push_right(Thread& thread, const size_t size, const Closure& closure, TaskGroupContext* context)
{
if (right >= TASK_STACK_SIZE)
// -- GODOT start --
// if (right >= TASK_STACK_SIZE)
// throw std::runtime_error("task stack overflow");
if (right >= TASK_STACK_SIZE) {
abort();
}
// -- GODOT end --
/* allocate new task on right side of stack */
size_t oldStackPtr = stackPtr;
TaskFunction* func = new (alloc(sizeof(ClosureTaskFunction<Closure>))) ClosureTaskFunction<Closure>(closure);
new (&(tasks[right.load()])) Task(func,thread.task,oldStackPtr,size);
new (&tasks[right.load()]) Task(func,thread.task,context,oldStackPtr,size);
right++;
/* also move left pointer */
@ -244,10 +256,7 @@ namespace embree
void wait_for_threads(size_t threadCount);
/*! thread loop for all worker threads */
// -- GODOT start --
// std::exception_ptr thread_loop(size_t threadIndex);
void thread_loop(size_t threadIndex);
// -- GODOT end --
/*! steals a task from a different thread */
bool steal_from_other_threads(Thread& thread);
@ -257,7 +266,7 @@ namespace embree
/* spawn a new task at the top of the threads task stack */
template<typename Closure>
void spawn_root(const Closure& closure, size_t size = 1, bool useThreadPool = true)
void spawn_root(const Closure& closure, TaskGroupContext* context, size_t size = 1, bool useThreadPool = true)
{
if (useThreadPool) startThreads();
@ -267,7 +276,7 @@ namespace embree
assert(threadLocal[threadIndex].load() == nullptr);
threadLocal[threadIndex] = &thread;
Thread* oldThread = swapThread(&thread);
thread.tasks.push_right(thread,size,closure);
thread.tasks.push_right(thread,size,closure,context);
{
Lock<MutexSys> lock(mutex);
anyTasksRunning++;
@ -286,36 +295,37 @@ namespace embree
/* remember exception to throw */
std::exception_ptr except = nullptr;
if (cancellingException != nullptr) except = cancellingException;
if (context->cancellingException != nullptr) except = context->cancellingException;
/* wait for all threads to terminate */
threadCounter--;
while (threadCounter > 0) yield();
cancellingException = nullptr;
context->cancellingException = nullptr;
/* re-throw proper exception */
if (except != nullptr)
if (except != nullptr) {
std::rethrow_exception(except);
}
/* spawn a new task at the top of the threads task stack */
template<typename Closure>
static __forceinline void spawn(size_t size, const Closure& closure)
{
Thread* thread = TaskScheduler::thread();
if (likely(thread != nullptr)) thread->tasks.push_right(*thread,size,closure);
else instance()->spawn_root(closure,size);
}
/* spawn a new task at the top of the threads task stack */
template<typename Closure>
static __forceinline void spawn(const Closure& closure) {
spawn(1,closure);
static __forceinline void spawn(size_t size, const Closure& closure, TaskGroupContext* context)
{
Thread* thread = TaskScheduler::thread();
if (likely(thread != nullptr)) thread->tasks.push_right(*thread,size,closure,context);
else instance()->spawn_root(closure,context,size);
}
/* spawn a new task at the top of the threads task stack */
template<typename Closure>
static __forceinline void spawn(const Closure& closure, TaskGroupContext* taskGroupContext) {
spawn(1,closure,taskGroupContext);
}
/* spawn a new task set */
template<typename Index, typename Closure>
static void spawn(const Index begin, const Index end, const Index blockSize, const Closure& closure)
static void spawn(const Index begin, const Index end, const Index blockSize, const Closure& closure, TaskGroupContext* context)
{
spawn(end-begin, [=]()
{
@ -323,14 +333,14 @@ namespace embree
return closure(range<Index>(begin,end));
}
const Index center = (begin+end)/2;
spawn(begin,center,blockSize,closure);
spawn(center,end ,blockSize,closure);
spawn(begin,center,blockSize,closure,context);
spawn(center,end ,blockSize,closure,context);
wait();
});
},context);
}
/* work on spawned subtasks and wait until all have finished */
dll_export static bool wait();
dll_export static void wait();
/* returns the ID of the current thread */
dll_export static size_t threadID();
@ -366,7 +376,6 @@ namespace embree
std::atomic<size_t> threadCounter;
std::atomic<size_t> anyTasksRunning;
std::atomic<bool> hasRootTask;
std::exception_ptr cancellingException;
MutexSys mutex;
ConditionSys condition;

View File

@ -15,6 +15,12 @@
# define NOMINMAX
#endif
#if defined(__INTEL_LLVM_COMPILER)
// prevents "'__thiscall' calling convention is not supported for this target" warning from TBB
#pragma clang diagnostic push
#pragma clang diagnostic ignored "-Wignored-attributes"
#endif
// We need to define these to avoid implicit linkage against
// tbb_debug.lib under Windows. When removing these lines debug build
// under Windows fails.
@ -25,6 +31,18 @@
#include "tbb/tbb.h"
#include "tbb/parallel_sort.h"
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION_MAJOR >= 8)
# define USE_TASK_ARENA 1
#else
# define USE_TASK_ARENA 0
#endif
#if defined(TASKING_TBB) && (TBB_INTERFACE_VERSION >= 11009) // TBB 2019 Update 9
# define TASKING_TBB_USE_TASK_ISOLATION 1
#else
# define TASKING_TBB_USE_TASK_ISOLATION 0
#endif
namespace embree
{
struct TaskScheduler
@ -65,3 +83,7 @@ namespace embree
};
};
#if defined(__INTEL_LLVM_COMPILER)
#pragma clang diagnostic pop
#endif

View File

@ -1,163 +0,0 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "rtcore_device.h"
RTC_NAMESPACE_BEGIN
/* Forward declarations for ray structures */
struct RTCRayHit;
struct RTCRayHit4;
struct RTCRayHit8;
struct RTCRayHit16;
struct RTCRayHitNp;
/* Scene flags */
enum RTCSceneFlags
{
RTC_SCENE_FLAG_NONE = 0,
RTC_SCENE_FLAG_DYNAMIC = (1 << 0),
RTC_SCENE_FLAG_COMPACT = (1 << 1),
RTC_SCENE_FLAG_ROBUST = (1 << 2),
RTC_SCENE_FLAG_CONTEXT_FILTER_FUNCTION = (1 << 3)
};
/* Creates a new scene. */
RTC_API RTCScene rtcNewScene(RTCDevice device);
/* Returns the device the scene got created in. The reference count of
* the device is incremented by this function. */
RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene);
/* Retains the scene (increments the reference count). */
RTC_API void rtcRetainScene(RTCScene scene);
/* Releases the scene (decrements the reference count). */
RTC_API void rtcReleaseScene(RTCScene scene);
/* Attaches the geometry to a scene. */
RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry);
/* Attaches the geometry to a scene using the specified geometry ID. */
RTC_API void rtcAttachGeometryByID(RTCScene scene, RTCGeometry geometry, unsigned int geomID);
/* Detaches the geometry from the scene. */
RTC_API void rtcDetachGeometry(RTCScene scene, unsigned int geomID);
/* Gets a geometry handle from the scene. This function is not thread safe and should get used during rendering. */
RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
/* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
/* Commits the scene. */
RTC_API void rtcCommitScene(RTCScene scene);
/* Commits the scene from multiple threads. */
RTC_API void rtcJoinCommitScene(RTCScene scene);
/* Progress monitor callback function */
typedef bool (*RTCProgressMonitorFunction)(void* ptr, double n);
/* Sets the progress monitor callback function of the scene. */
RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene scene, RTCProgressMonitorFunction progress, void* ptr);
/* Sets the build quality of the scene. */
RTC_API void rtcSetSceneBuildQuality(RTCScene scene, enum RTCBuildQuality quality);
/* Sets the scene flags. */
RTC_API void rtcSetSceneFlags(RTCScene scene, enum RTCSceneFlags flags);
/* Returns the scene flags. */
RTC_API enum RTCSceneFlags rtcGetSceneFlags(RTCScene scene);
/* Returns the axis-aligned bounds of the scene. */
RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o);
/* Returns the linear axis-aligned bounds of the scene. */
RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o);
/* Perform a closest point query of the scene. */
RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcPointQuery4(const int* valid, RTCScene scene, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcPointQuery8(const int* valid, RTCScene scene, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcPointQuery16(const int* valid, RTCScene scene, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Intersects a single ray with the scene. */
RTC_API void rtcIntersect1(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit* rayhit);
/* Intersects a packet of 4 rays with the scene. */
RTC_API void rtcIntersect4(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit4* rayhit);
/* Intersects a packet of 8 rays with the scene. */
RTC_API void rtcIntersect8(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit8* rayhit);
/* Intersects a packet of 16 rays with the scene. */
RTC_API void rtcIntersect16(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit16* rayhit);
/* Intersects a stream of M rays with the scene. */
RTC_API void rtcIntersect1M(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit* rayhit, unsigned int M, size_t byteStride);
/* Intersects a stream of pointers to M rays with the scene. */
RTC_API void rtcIntersect1Mp(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHit** rayhit, unsigned int M);
/* Intersects a stream of M ray packets of size N in SOA format with the scene. */
RTC_API void rtcIntersectNM(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayHitN* rayhit, unsigned int N, unsigned int M, size_t byteStride);
/* Intersects a stream of M ray packets of size N in SOA format with the scene. */
RTC_API void rtcIntersectNp(RTCScene scene, struct RTCIntersectContext* context, const struct RTCRayHitNp* rayhit, unsigned int N);
/* Tests a single ray for occlusion with the scene. */
RTC_API void rtcOccluded1(RTCScene scene, struct RTCIntersectContext* context, struct RTCRay* ray);
/* Tests a packet of 4 rays for occlusion occluded with the scene. */
RTC_API void rtcOccluded4(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRay4* ray);
/* Tests a packet of 8 rays for occlusion with the scene. */
RTC_API void rtcOccluded8(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRay8* ray);
/* Tests a packet of 16 rays for occlusion with the scene. */
RTC_API void rtcOccluded16(const int* valid, RTCScene scene, struct RTCIntersectContext* context, struct RTCRay16* ray);
/* Tests a stream of M rays for occlusion with the scene. */
RTC_API void rtcOccluded1M(RTCScene scene, struct RTCIntersectContext* context, struct RTCRay* ray, unsigned int M, size_t byteStride);
/* Tests a stream of pointers to M rays for occlusion with the scene. */
RTC_API void rtcOccluded1Mp(RTCScene scene, struct RTCIntersectContext* context, struct RTCRay** ray, unsigned int M);
/* Tests a stream of M ray packets of size N in SOA format for occlusion with the scene. */
RTC_API void rtcOccludedNM(RTCScene scene, struct RTCIntersectContext* context, struct RTCRayN* ray, unsigned int N, unsigned int M, size_t byteStride);
/* Tests a stream of M ray packets of size N in SOA format for occlusion with the scene. */
RTC_API void rtcOccludedNp(RTCScene scene, struct RTCIntersectContext* context, const struct RTCRayNp* ray, unsigned int N);
/*! collision callback */
struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; };
typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions, unsigned int num_collisions);
/*! Performs collision detection of two scenes */
RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr);
#if defined(__cplusplus)
/* Helper for easily combining scene flags */
inline RTCSceneFlags operator|(RTCSceneFlags a, RTCSceneFlags b) {
return (RTCSceneFlags)((size_t)a | (size_t)b);
}
#endif
RTC_NAMESPACE_END

View File

@ -27,6 +27,8 @@ enum RTCBufferType
RTC_BUFFER_TYPE_VERTEX_CREASE_WEIGHT = 21,
RTC_BUFFER_TYPE_HOLE = 22,
RTC_BUFFER_TYPE_TRANSFORM = 23,
RTC_BUFFER_TYPE_FLAGS = 32
};

View File

@ -12,7 +12,7 @@
RTC_NAMESPACE_BEGIN
#if defined(_WIN32)
#if defined(_M_X64)
#if defined(_M_X64) || defined(_M_ARM64)
typedef long long ssize_t;
#else
typedef int ssize_t;
@ -41,6 +41,12 @@ typedef int ssize_t;
# define RTC_FORCEINLINE inline __attribute__((always_inline))
#endif
#if defined(__cplusplus)
# define RTC_OPTIONAL_ARGUMENT = nullptr
#else
# define RTC_OPTIONAL_ARGUMENT
#endif
/* Invalid geometry ID */
#define RTC_INVALID_GEOMETRY_ID ((unsigned int)-1)
@ -141,7 +147,9 @@ enum RTCFormat
RTC_FORMAT_FLOAT4X4_COLUMN_MAJOR = 0x9244,
/* special 12-byte format for grids */
RTC_FORMAT_GRID = 0xA001
RTC_FORMAT_GRID = 0xA001,
RTC_FORMAT_QUATERNION_DECOMPOSITION = 0xB001,
};
/* Build quality levels */
@ -167,12 +175,138 @@ struct RTC_ALIGN(16) RTCLinearBounds
struct RTCBounds bounds1;
};
/* Intersection context flags */
enum RTCIntersectContextFlags
/* Feature flags for SYCL specialization constants */
enum RTCFeatureFlags
{
RTC_INTERSECT_CONTEXT_FLAG_NONE = 0,
RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT = (0 << 0), // optimize for incoherent rays
RTC_INTERSECT_CONTEXT_FLAG_COHERENT = (1 << 0) // optimize for coherent rays
RTC_FEATURE_FLAG_NONE = 0,
RTC_FEATURE_FLAG_MOTION_BLUR = 1 << 0,
RTC_FEATURE_FLAG_TRIANGLE = 1 << 1,
RTC_FEATURE_FLAG_QUAD = 1 << 2,
RTC_FEATURE_FLAG_GRID = 1 << 3,
RTC_FEATURE_FLAG_SUBDIVISION = 1 << 4,
RTC_FEATURE_FLAG_CONE_LINEAR_CURVE = 1 << 5,
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE = 1 << 6,
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE = 1 << 7,
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE = 1 << 8,
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE = 1 << 9,
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE = 1 << 10,
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE = 1 << 11,
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE = 1 << 12,
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE = 1 << 13,
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE = 1 << 14,
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE = 1 << 15,
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE = 1 << 16,
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE = 1 << 17,
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE = 1 << 18,
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE = 1 << 19,
RTC_FEATURE_FLAG_SPHERE_POINT = 1 << 20,
RTC_FEATURE_FLAG_DISC_POINT = 1 << 21,
RTC_FEATURE_FLAG_ORIENTED_DISC_POINT = 1 << 22,
RTC_FEATURE_FLAG_POINT =
RTC_FEATURE_FLAG_SPHERE_POINT |
RTC_FEATURE_FLAG_DISC_POINT |
RTC_FEATURE_FLAG_ORIENTED_DISC_POINT,
RTC_FEATURE_FLAG_ROUND_CURVES =
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE |
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE |
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE |
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE |
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE,
RTC_FEATURE_FLAG_FLAT_CURVES =
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE |
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE |
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE |
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE |
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE,
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CURVES =
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE,
RTC_FEATURE_FLAG_LINEAR_CURVES =
RTC_FEATURE_FLAG_CONE_LINEAR_CURVE |
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE |
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE,
RTC_FEATURE_FLAG_BEZIER_CURVES =
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE |
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE,
RTC_FEATURE_FLAG_BSPLINE_CURVES =
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE |
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE,
RTC_FEATURE_FLAG_HERMITE_CURVES =
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE |
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE,
RTC_FEATURE_FLAG_CURVES =
RTC_FEATURE_FLAG_CONE_LINEAR_CURVE |
RTC_FEATURE_FLAG_ROUND_LINEAR_CURVE |
RTC_FEATURE_FLAG_FLAT_LINEAR_CURVE |
RTC_FEATURE_FLAG_ROUND_BEZIER_CURVE |
RTC_FEATURE_FLAG_FLAT_BEZIER_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BEZIER_CURVE |
RTC_FEATURE_FLAG_ROUND_BSPLINE_CURVE |
RTC_FEATURE_FLAG_FLAT_BSPLINE_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_BSPLINE_CURVE |
RTC_FEATURE_FLAG_ROUND_HERMITE_CURVE |
RTC_FEATURE_FLAG_FLAT_HERMITE_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_HERMITE_CURVE |
RTC_FEATURE_FLAG_ROUND_CATMULL_ROM_CURVE |
RTC_FEATURE_FLAG_FLAT_CATMULL_ROM_CURVE |
RTC_FEATURE_FLAG_NORMAL_ORIENTED_CATMULL_ROM_CURVE,
RTC_FEATURE_FLAG_INSTANCE = 1 << 23,
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = 1 << 24,
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY = 1 << 25,
RTC_FEATURE_FLAG_FILTER_FUNCTION =
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS |
RTC_FEATURE_FLAG_FILTER_FUNCTION_IN_GEOMETRY,
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS = 1 << 26,
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY = 1 << 27,
RTC_FEATURE_FLAG_USER_GEOMETRY =
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_ARGUMENTS |
RTC_FEATURE_FLAG_USER_GEOMETRY_CALLBACK_IN_GEOMETRY,
RTC_FEATURE_FLAG_32_BIT_RAY_MASK = 1 << 28,
RTC_FEATURE_FLAG_INSTANCE_ARRAY = 1 << 29,
RTC_FEATURE_FLAG_ALL = 0xffffffff,
};
/* Ray query flags */
enum RTCRayQueryFlags
{
/* matching intel_ray_flags_t layout */
RTC_RAY_QUERY_FLAG_NONE = 0,
RTC_RAY_QUERY_FLAG_INVOKE_ARGUMENT_FILTER = (1 << 1), // enable argument filter for each geometry
/* embree specific flags */
RTC_RAY_QUERY_FLAG_INCOHERENT = (0 << 16), // optimize for incoherent rays
RTC_RAY_QUERY_FLAG_COHERENT = (1 << 16), // optimize for coherent rays
};
/* Arguments for RTCFilterFunctionN */
@ -180,7 +314,7 @@ struct RTCFilterFunctionNArguments
{
int* valid;
void* geometryUserPtr;
struct RTCIntersectContext* context;
struct RTCRayQueryContext* context;
struct RTCRayN* ray;
struct RTCHitN* hit;
unsigned int N;
@ -189,38 +323,41 @@ struct RTCFilterFunctionNArguments
/* Filter callback function */
typedef void (*RTCFilterFunctionN)(const struct RTCFilterFunctionNArguments* args);
/* Intersection context passed to intersect/occluded calls */
struct RTCIntersectContext
{
enum RTCIntersectContextFlags flags; // intersection flags
RTCFilterFunctionN filter; // filter function to execute
/* Intersection callback function */
struct RTCIntersectFunctionNArguments;
typedef void (*RTCIntersectFunctionN)(const struct RTCIntersectFunctionNArguments* args);
/* Occlusion callback function */
struct RTCOccludedFunctionNArguments;
typedef void (*RTCOccludedFunctionN)(const struct RTCOccludedFunctionNArguments* args);
/* Ray query context passed to intersect/occluded calls */
struct RTCRayQueryContext
{
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
unsigned int instStackSize; // Number of instances currently on the stack.
#endif
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // The current stack of instance ids.
#if RTC_MIN_WIDTH
float minWidthDistanceFactor; // curve radius is set to this factor times distance to ray origin
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // The current stack of instance primitive ids.
#endif
};
/* Initializes an intersection context. */
RTC_FORCEINLINE void rtcInitIntersectContext(struct RTCIntersectContext* context)
/* Initializes an ray query context. */
RTC_FORCEINLINE void rtcInitRayQueryContext(struct RTCRayQueryContext* context)
{
unsigned l = 0;
context->flags = RTC_INTERSECT_CONTEXT_FLAG_INCOHERENT;
context->filter = NULL;
#if RTC_MAX_INSTANCE_LEVEL_COUNT > 1
context->instStackSize = 0;
#endif
for (; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l)
context->instID[l] = RTC_INVALID_GEOMETRY_ID;
#if RTC_MIN_WIDTH
context->minWidthDistanceFactor = 0.0f;
for (; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
context->instID[l] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
#endif
}
}
/* Point query structure for closest point query */
@ -278,15 +415,28 @@ struct RTC_ALIGN(16) RTCPointQueryContext
// instance ids.
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT];
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
// instance prim ids.
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT];
#endif
// number of instances currently on the stack.
unsigned int instStackSize;
};
/* Initializes an intersection context. */
/* Initializes an ray query context. */
RTC_FORCEINLINE void rtcInitPointQueryContext(struct RTCPointQueryContext* context)
{
unsigned l = 0;
context->instStackSize = 0;
context->instID[0] = RTC_INVALID_GEOMETRY_ID;
for (; l < RTC_MAX_INSTANCE_LEVEL_COUNT; ++l) {
context->instID[l] = RTC_INVALID_GEOMETRY_ID;
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
context->instPrimID[l] = RTC_INVALID_GEOMETRY_ID;
#endif
}
}
struct RTC_ALIGN(16) RTCPointQueryFunctionArguments
@ -308,7 +458,7 @@ struct RTC_ALIGN(16) RTCPointQueryFunctionArguments
struct RTCPointQueryContext* context;
// If the current instance transform M (= context->world2inst[context->instStackSize])
// is a similarity matrix, i.e there is a constant factor similarityScale such that,
// is a similarity matrix, i.e there is a constant factor similarityScale such that
// for all x,y: dist(Mx, My) = similarityScale * dist(x, y),
// The similarity scale is 0, if the current instance transform is not a
// similarity transform and vice versa. The similarity scale allows to compute
@ -323,4 +473,30 @@ struct RTC_ALIGN(16) RTCPointQueryFunctionArguments
typedef bool (*RTCPointQueryFunction)(struct RTCPointQueryFunctionArguments* args);
#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
/* returns function pointer to be usable in SYCL kernel */
template<auto F>
inline decltype(F) rtcGetSYCLDeviceFunctionPointer(sycl::queue& queue)
{
sycl::buffer<cl_ulong> fptr_buf(1);
{
auto fptr_acc = fptr_buf.get_host_access();
fptr_acc[0] = 0;
}
queue.submit([&](sycl::handler& cgh) {
auto fptr_acc = fptr_buf.get_access<sycl::access::mode::discard_write>(cgh);
cgh.single_task([=]() {
fptr_acc[0] = reinterpret_cast<cl_ulong>(F);
});
});
queue.wait_and_throw();
auto fptr_acc = fptr_buf.get_host_access();
return (decltype(F)) fptr_acc[0];
}
#endif
RTC_NAMESPACE_END

View File

@ -3,21 +3,32 @@
#pragma once
#define RTC_VERSION_MAJOR 3
#define RTC_VERSION_MINOR 13
#define RTC_VERSION_PATCH 5
#define RTC_VERSION 31305
#define RTC_VERSION_STRING "3.13.5"
#if !defined(EMBREE_SYCL_SUPPORT)
// #cmakedefine EMBREE_SYCL_SUPPORT
#endif
#define RTC_VERSION_MAJOR 4
#define RTC_VERSION_MINOR 3
#define RTC_VERSION_PATCH 1
#define RTC_VERSION 40301
#define RTC_VERSION_STRING "4.3.1"
#define RTC_MAX_INSTANCE_LEVEL_COUNT 1
// #cmakedefine EMBREE_GEOMETRY_INSTANCE_ARRAY
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
#define RTC_GEOMETRY_INSTANCE_ARRAY
#endif
// #cmakedefine01 EMBREE_SYCL_GEOMETRY_CALLBACK
#define EMBREE_MIN_WIDTH 0
#define RTC_MIN_WIDTH EMBREE_MIN_WIDTH
#if !defined(EMBREE_STATIC_LIB)
# define EMBREE_STATIC_LIB
#define EMBREE_STATIC_LIB
#endif
/* #undef EMBREE_API_NAMESPACE*/
// #cmakedefine EMBREE_API_NAMESPACE
#if defined(EMBREE_API_NAMESPACE)
# define RTC_NAMESPACE
@ -56,3 +67,14 @@
#else
# define RTC_API RTC_API_IMPORT
#endif
#if defined(ISPC)
# define RTC_SYCL_INDIRECTLY_CALLABLE
#elif defined(__SYCL_DEVICE_ONLY__)
# define RTC_SYCL_INDIRECTLY_CALLABLE [[intel::device_indirectly_callable]] SYCL_EXTERNAL
# define RTC_SYCL_API SYCL_EXTERNAL
#else
# define RTC_SYCL_INDIRECTLY_CALLABLE
# define RTC_SYCL_API RTC_API
#endif

View File

@ -13,6 +13,24 @@ typedef struct RTCDeviceTy* RTCDevice;
/* Creates a new Embree device. */
RTC_API RTCDevice rtcNewDevice(const char* config);
#if defined(EMBREE_SYCL_SUPPORT) && defined(SYCL_LANGUAGE_VERSION)
/* Creates a new Embree SYCL device. */
RTC_API_EXTERN_C RTCDevice rtcNewSYCLDevice(sycl::context context, const char* config);
/* Checks if SYCL device is supported by Embree. */
RTC_API bool rtcIsSYCLDeviceSupported(const sycl::device sycl_device);
/* SYCL selector for Embree supported devices */
RTC_API int rtcSYCLDeviceSelector(const sycl::device sycl_device);
/* Set the SYCL device to be used to allocate data */
RTC_API void rtcSetDeviceSYCLDevice(RTCDevice device, const sycl::device sycl_device);
#endif
/* Retains the Embree device (increments the reference count). */
RTC_API void rtcRetainDevice(RTCDevice device);
@ -30,8 +48,8 @@ enum RTCDeviceProperty
RTC_DEVICE_PROPERTY_NATIVE_RAY4_SUPPORTED = 32,
RTC_DEVICE_PROPERTY_NATIVE_RAY8_SUPPORTED = 33,
RTC_DEVICE_PROPERTY_NATIVE_RAY16_SUPPORTED = 34,
RTC_DEVICE_PROPERTY_RAY_STREAM_SUPPORTED = 35,
RTC_DEVICE_PROPERTY_BACKFACE_CULLING_SPHERES_ENABLED = 62,
RTC_DEVICE_PROPERTY_BACKFACE_CULLING_CURVES_ENABLED = 63,
RTC_DEVICE_PROPERTY_RAY_MASK_SUPPORTED = 64,
RTC_DEVICE_PROPERTY_BACKFACE_CULLING_ENABLED = 65,
@ -66,7 +84,7 @@ enum RTCError
RTC_ERROR_INVALID_OPERATION = 3,
RTC_ERROR_OUT_OF_MEMORY = 4,
RTC_ERROR_UNSUPPORTED_CPU = 5,
RTC_ERROR_CANCELLED = 6
RTC_ERROR_CANCELLED = 6,
};
/* Returns the error code. */

View File

@ -48,7 +48,8 @@ enum RTCGeometryType
RTC_GEOMETRY_TYPE_NORMAL_ORIENTED_CATMULL_ROM_CURVE = 60, // flat normal-oriented Catmull-Rom curves
RTC_GEOMETRY_TYPE_USER = 120, // user-defined geometry
RTC_GEOMETRY_TYPE_INSTANCE = 121 // scene instance
RTC_GEOMETRY_TYPE_INSTANCE = 121, // scene instance
RTC_GEOMETRY_TYPE_INSTANCE_ARRAY = 122, // scene instance array
};
/* Interpolation modes for subdivision surfaces */
@ -86,30 +87,24 @@ struct RTCIntersectFunctionNArguments
int* valid;
void* geometryUserPtr;
unsigned int primID;
struct RTCIntersectContext* context;
struct RTCRayQueryContext* context;
struct RTCRayHitN* rayhit;
unsigned int N;
unsigned int geomID;
};
/* Intersection callback function */
typedef void (*RTCIntersectFunctionN)(const struct RTCIntersectFunctionNArguments* args);
/* Arguments for RTCOccludedFunctionN */
struct RTCOccludedFunctionNArguments
{
int* valid;
void* geometryUserPtr;
unsigned int primID;
struct RTCIntersectContext* context;
struct RTCRayQueryContext* context;
struct RTCRayN* ray;
unsigned int N;
unsigned int geomID;
};
/* Occlusion callback function */
typedef void (*RTCOccludedFunctionN)(const struct RTCOccludedFunctionNArguments* args);
/* Arguments for RTCDisplacementFunctionN */
struct RTCDisplacementFunctionNArguments
{
@ -192,6 +187,9 @@ RTC_API void rtcSetGeometryIntersectFilterFunction(RTCGeometry geometry, RTCFilt
/* Sets the occlusion filter callback function of the geometry. */
RTC_API void rtcSetGeometryOccludedFilterFunction(RTCGeometry geometry, RTCFilterFunctionN filter);
/* Enables argument version of intersection or occlusion filter function. */
RTC_API void rtcSetGeometryEnableFilterFunctionFromArguments(RTCGeometry geometry, bool enable);
/* Sets the user-defined data pointer of the geometry. */
RTC_API void rtcSetGeometryUserData(RTCGeometry geometry, void* ptr);
@ -214,15 +212,17 @@ RTC_API void rtcSetGeometryIntersectFunction(RTCGeometry geometry, RTCIntersectF
RTC_API void rtcSetGeometryOccludedFunction(RTCGeometry geometry, RTCOccludedFunctionN occluded);
/* Invokes the intersection filter from the intersection callback function. */
RTC_API void rtcFilterIntersection(const struct RTCIntersectFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
RTC_SYCL_API void rtcInvokeIntersectFilterFromGeometry(const struct RTCIntersectFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
/* Invokes the occlusion filter from the occlusion callback function. */
RTC_API void rtcFilterOcclusion(const struct RTCOccludedFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
RTC_SYCL_API void rtcInvokeOccludedFilterFromGeometry(const struct RTCOccludedFunctionNArguments* args, const struct RTCFilterFunctionNArguments* filterArgs);
/* Sets the instanced scene of an instance geometry. */
RTC_API void rtcSetGeometryInstancedScene(RTCGeometry geometry, RTCScene scene);
/* Sets the instanced scenes of an instance array geometry. */
RTC_API void rtcSetGeometryInstancedScenes(RTCGeometry geometry, RTCScene* scenes, size_t numScenes);
/* Sets the transformation of an instance for the specified time step. */
RTC_API void rtcSetGeometryTransform(RTCGeometry geometry, unsigned int timeStep, enum RTCFormat format, const void* xfm);
@ -232,6 +232,12 @@ RTC_API void rtcSetGeometryTransformQuaternion(RTCGeometry geometry, unsigned in
/* Returns the interpolated transformation of an instance for the specified time. */
RTC_API void rtcGetGeometryTransform(RTCGeometry geometry, float time, enum RTCFormat format, void* xfm);
/*
* Returns the interpolated transformation of the instPrimID'th instance of an
* instance array for the specified time. If geometry is an regular instance,
* instPrimID must be 0.
*/
RTC_API void rtcGetGeometryTransformEx(RTCGeometry geometry, unsigned int instPrimID, float time, enum RTCFormat format, void* xfm);
/* Sets the uniform tessellation rate of the geometry. */
RTC_API void rtcSetGeometryTessellationRate(RTCGeometry geometry, float tessellationRate);

View File

@ -39,6 +39,9 @@ struct RTC_ALIGN(16) RTCHit
unsigned int primID; // primitive ID
unsigned int geomID; // geometry ID
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance ID
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT]; // instance primitive ID
#endif
};
/* Combined ray/hit structure for a single ray */
@ -80,6 +83,9 @@ struct RTC_ALIGN(16) RTCHit4
unsigned int primID[4];
unsigned int geomID[4];
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][4];
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][4];
#endif
};
/* Combined ray/hit structure for a packet of 4 rays */
@ -121,6 +127,9 @@ struct RTC_ALIGN(32) RTCHit8
unsigned int primID[8];
unsigned int geomID[8];
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][8];
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][8];
#endif
};
/* Combined ray/hit structure for a packet of 8 rays */
@ -162,6 +171,9 @@ struct RTC_ALIGN(64) RTCHit16
unsigned int primID[16];
unsigned int geomID[16];
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][16];
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][16];
#endif
};
/* Combined ray/hit structure for a packet of 16 rays */
@ -171,47 +183,6 @@ struct RTCRayHit16
struct RTCHit16 hit;
};
/* Ray structure for a packet/stream of N rays in pointer SOA layout */
struct RTCRayNp
{
float* org_x;
float* org_y;
float* org_z;
float* tnear;
float* dir_x;
float* dir_y;
float* dir_z;
float* time;
float* tfar;
unsigned int* mask;
unsigned int* id;
unsigned int* flags;
};
/* Hit structure for a packet/stream of N rays in pointer SOA layout */
struct RTCHitNp
{
float* Ng_x;
float* Ng_y;
float* Ng_z;
float* u;
float* v;
unsigned int* primID;
unsigned int* geomID;
unsigned int* instID[RTC_MAX_INSTANCE_LEVEL_COUNT];
};
/* Combined ray/hit structure for a packet/stream of N rays in pointer SOA layout */
struct RTCRayHitNp
{
struct RTCRayNp ray;
struct RTCHitNp hit;
};
struct RTCRayN;
struct RTCHitN;
struct RTCRayHitN;
@ -242,9 +213,12 @@ RTC_FORCEINLINE float& RTCHitN_Ng_z(RTCHitN* hit, unsigned int N, unsigned int i
RTC_FORCEINLINE float& RTCHitN_u(RTCHitN* hit, unsigned int N, unsigned int i) { return ((float*)hit)[3*N+i]; }
RTC_FORCEINLINE float& RTCHitN_v(RTCHitN* hit, unsigned int N, unsigned int i) { return ((float*)hit)[4*N+i]; }
RTC_FORCEINLINE unsigned int& RTCHitN_primID(RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[5*N+i]; }
RTC_FORCEINLINE unsigned int& RTCHitN_geomID(RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[6*N+i]; }
RTC_FORCEINLINE unsigned int& RTCHitN_instID(RTCHitN* hit, unsigned int N, unsigned int i, unsigned int l) { return ((unsigned*)hit)[7*N+i+N*l]; }
RTC_FORCEINLINE unsigned int& RTCHitN_primID (RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[5*N+i]; }
RTC_FORCEINLINE unsigned int& RTCHitN_geomID (RTCHitN* hit, unsigned int N, unsigned int i) { return ((unsigned*)hit)[6*N+i]; }
RTC_FORCEINLINE unsigned int& RTCHitN_instID (RTCHitN* hit, unsigned int N, unsigned int i, unsigned int l) { return ((unsigned*)hit)[7*N + N*l + i]; }
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
RTC_FORCEINLINE unsigned int& RTCHitN_instPrimID(RTCHitN* hit, unsigned int N, unsigned int i, unsigned int l) { return ((unsigned*)hit)[7*N + N*RTC_MAX_INSTANCE_LEVEL_COUNT + N*l + i]; }
#endif
/* Helper functions to extract RTCRayN and RTCHitN from RTCRayHitN */
RTC_FORCEINLINE RTCRayN* RTCRayHitN_RayN(RTCRayHitN* rayhit, unsigned int N) { return (RTCRayN*)&((float*)rayhit)[0*N]; }
@ -284,6 +258,9 @@ struct RTCHitNt
unsigned int primID[N];
unsigned int geomID[N];
unsigned int instID[RTC_MAX_INSTANCE_LEVEL_COUNT][N];
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
unsigned int instPrimID[RTC_MAX_INSTANCE_LEVEL_COUNT][N];
#endif
};
/* Helper structure for a combined ray/hit packet of compile-time size N */
@ -322,8 +299,12 @@ RTC_FORCEINLINE RTCHit rtcGetHitFromHitN(RTCHitN* hitN, unsigned int N, unsigned
hit.v = RTCHitN_v(hitN,N,i);
hit.primID = RTCHitN_primID(hitN,N,i);
hit.geomID = RTCHitN_geomID(hitN,N,i);
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++)
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
hit.instID[l] = RTCHitN_instID(hitN,N,i,l);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
hit.instPrimID[l] = RTCHitN_instPrimID(hitN,N,i,l);
#endif
}
return hit;
}
@ -336,8 +317,12 @@ RTC_FORCEINLINE void rtcCopyHitToHitN(RTCHitN* hitN, const RTCHit* hit, unsigned
RTCHitN_v(hitN,N,i) = hit->v;
RTCHitN_primID(hitN,N,i) = hit->primID;
RTCHitN_geomID(hitN,N,i) = hit->geomID;
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++)
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
RTCHitN_instID(hitN,N,i,l) = hit->instID[l];
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
RTCHitN_instPrimID(hitN,N,i,l) = hit->instPrimID[l];
#endif
}
}
RTC_FORCEINLINE RTCRayHit rtcGetRayHitFromRayHitN(RTCRayHitN* rayhitN, unsigned int N, unsigned int i)
@ -366,8 +351,12 @@ RTC_FORCEINLINE RTCRayHit rtcGetRayHitFromRayHitN(RTCRayHitN* rayhitN, unsigned
rh.hit.v = RTCHitN_v(hit,N,i);
rh.hit.primID = RTCHitN_primID(hit,N,i);
rh.hit.geomID = RTCHitN_geomID(hit,N,i);
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++)
for (unsigned int l = 0; l < RTC_MAX_INSTANCE_LEVEL_COUNT; l++) {
rh.hit.instID[l] = RTCHitN_instID(hit,N,i,l);
#if defined(RTC_GEOMETRY_INSTANCE_ARRAY)
rh.hit.instPrimID[l] = RTCHitN_instPrimID(hit,N,i,l);
#endif
}
return rh;
}

View File

@ -0,0 +1,252 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "rtcore_device.h"
RTC_NAMESPACE_BEGIN
/* Forward declarations for ray structures */
struct RTCRayHit;
struct RTCRayHit4;
struct RTCRayHit8;
struct RTCRayHit16;
/* Scene flags */
enum RTCSceneFlags
{
RTC_SCENE_FLAG_NONE = 0,
RTC_SCENE_FLAG_DYNAMIC = (1 << 0),
RTC_SCENE_FLAG_COMPACT = (1 << 1),
RTC_SCENE_FLAG_ROBUST = (1 << 2),
RTC_SCENE_FLAG_FILTER_FUNCTION_IN_ARGUMENTS = (1 << 3)
};
/* Additional arguments for rtcIntersect1/4/8/16 calls */
struct RTCIntersectArguments
{
enum RTCRayQueryFlags flags; // intersection flags
enum RTCFeatureFlags feature_mask; // selectively enable features for traversal
struct RTCRayQueryContext* context; // optional pointer to ray query context
RTCFilterFunctionN filter; // filter function to execute
RTCIntersectFunctionN intersect; // user geometry intersection callback to execute
#if RTC_MIN_WIDTH
float minWidthDistanceFactor; // curve radius is set to this factor times distance to ray origin
#endif
};
/* Initializes intersection arguments. */
RTC_FORCEINLINE void rtcInitIntersectArguments(struct RTCIntersectArguments* args)
{
args->flags = RTC_RAY_QUERY_FLAG_INCOHERENT;
args->feature_mask = RTC_FEATURE_FLAG_ALL;
args->context = NULL;
args->filter = NULL;
args->intersect = NULL;
#if RTC_MIN_WIDTH
args->minWidthDistanceFactor = 0.0f;
#endif
}
/* Additional arguments for rtcOccluded1/4/8/16 calls */
struct RTCOccludedArguments
{
enum RTCRayQueryFlags flags; // intersection flags
enum RTCFeatureFlags feature_mask; // selectively enable features for traversal
struct RTCRayQueryContext* context; // optional pointer to ray query context
RTCFilterFunctionN filter; // filter function to execute
RTCOccludedFunctionN occluded; // user geometry occlusion callback to execute
#if RTC_MIN_WIDTH
float minWidthDistanceFactor; // curve radius is set to this factor times distance to ray origin
#endif
};
/* Initializes an intersection arguments. */
RTC_FORCEINLINE void rtcInitOccludedArguments(struct RTCOccludedArguments* args)
{
args->flags = RTC_RAY_QUERY_FLAG_INCOHERENT;
args->feature_mask = RTC_FEATURE_FLAG_ALL;
args->context = NULL;
args->filter = NULL;
args->occluded = NULL;
#if RTC_MIN_WIDTH
args->minWidthDistanceFactor = 0.0f;
#endif
}
/* Creates a new scene. */
RTC_API RTCScene rtcNewScene(RTCDevice device);
/* Returns the device the scene got created in. The reference count of
* the device is incremented by this function. */
RTC_API RTCDevice rtcGetSceneDevice(RTCScene hscene);
/* Retains the scene (increments the reference count). */
RTC_API void rtcRetainScene(RTCScene scene);
/* Releases the scene (decrements the reference count). */
RTC_API void rtcReleaseScene(RTCScene scene);
/* Attaches the geometry to a scene. */
RTC_API unsigned int rtcAttachGeometry(RTCScene scene, RTCGeometry geometry);
/* Attaches the geometry to a scene using the specified geometry ID. */
RTC_API void rtcAttachGeometryByID(RTCScene scene, RTCGeometry geometry, unsigned int geomID);
/* Detaches the geometry from the scene. */
RTC_API void rtcDetachGeometry(RTCScene scene, unsigned int geomID);
/* Gets a geometry handle from the scene. This function is not thread safe and should get used during rendering. */
RTC_API RTCGeometry rtcGetGeometry(RTCScene scene, unsigned int geomID);
/* Gets a geometry handle from the scene. This function is thread safe and should NOT get used during rendering. */
RTC_API RTCGeometry rtcGetGeometryThreadSafe(RTCScene scene, unsigned int geomID);
/* Gets the user-defined data pointer of the geometry. This function is not thread safe and should get used during rendering. */
RTC_SYCL_API void* rtcGetGeometryUserDataFromScene(RTCScene scene, unsigned int geomID);
/* Returns the interpolated transformation of an instance for the specified time. */
RTC_SYCL_API void rtcGetGeometryTransformFromScene(RTCScene scene, unsigned int geomID, float time, enum RTCFormat format, void* xfm);
/* Commits the scene. */
RTC_API void rtcCommitScene(RTCScene scene);
/* Commits the scene from multiple threads. */
RTC_API void rtcJoinCommitScene(RTCScene scene);
/* Progress monitor callback function */
typedef bool (*RTCProgressMonitorFunction)(void* ptr, double n);
/* Sets the progress monitor callback function of the scene. */
RTC_API void rtcSetSceneProgressMonitorFunction(RTCScene scene, RTCProgressMonitorFunction progress, void* ptr);
/* Sets the build quality of the scene. */
RTC_API void rtcSetSceneBuildQuality(RTCScene scene, enum RTCBuildQuality quality);
/* Sets the scene flags. */
RTC_API void rtcSetSceneFlags(RTCScene scene, enum RTCSceneFlags flags);
/* Returns the scene flags. */
RTC_API enum RTCSceneFlags rtcGetSceneFlags(RTCScene scene);
/* Returns the axis-aligned bounds of the scene. */
RTC_API void rtcGetSceneBounds(RTCScene scene, struct RTCBounds* bounds_o);
/* Returns the linear axis-aligned bounds of the scene. */
RTC_API void rtcGetSceneLinearBounds(RTCScene scene, struct RTCLinearBounds* bounds_o);
/* Perform a closest point query of the scene. */
RTC_API bool rtcPointQuery(RTCScene scene, struct RTCPointQuery* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void* userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcPointQuery4(const int* valid, RTCScene scene, struct RTCPointQuery4* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcPointQuery8(const int* valid, RTCScene scene, struct RTCPointQuery8* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Perform a closest point query with a packet of 4 points with the scene. */
RTC_API bool rtcPointQuery16(const int* valid, RTCScene scene, struct RTCPointQuery16* query, struct RTCPointQueryContext* context, RTCPointQueryFunction queryFunc, void** userPtr);
/* Intersects a single ray with the scene. */
RTC_SYCL_API void rtcIntersect1(RTCScene scene, struct RTCRayHit* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Intersects a packet of 4 rays with the scene. */
RTC_API void rtcIntersect4(const int* valid, RTCScene scene, struct RTCRayHit4* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Intersects a packet of 8 rays with the scene. */
RTC_API void rtcIntersect8(const int* valid, RTCScene scene, struct RTCRayHit8* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Intersects a packet of 16 rays with the scene. */
RTC_API void rtcIntersect16(const int* valid, RTCScene scene, struct RTCRayHit16* rayhit, struct RTCIntersectArguments* args RTC_OPTIONAL_ARGUMENT);
/* Forwards ray inside user geometry callback. */
RTC_SYCL_API void rtcForwardIntersect1(const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID);
/* Forwards ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_SYCL_API void rtcForwardIntersect1Ex(const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards ray packet of size 4 inside user geometry callback. */
RTC_API void rtcForwardIntersect4(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID);
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardIntersect4Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID, unsigned int primInstID);
/* Forwards ray packet of size 8 inside user geometry callback. */
RTC_API void rtcForwardIntersect8(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID);
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardIntersect8Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID, unsigned int primInstID);
/* Forwards ray packet of size 16 inside user geometry callback. */
RTC_API void rtcForwardIntersect16(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID);
/* Forwards ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardIntersect16Ex(const int* valid, const struct RTCIntersectFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int primInstID);
/* Tests a single ray for occlusion with the scene. */
RTC_SYCL_API void rtcOccluded1(RTCScene scene, struct RTCRay* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Tests a packet of 4 rays for occlusion occluded with the scene. */
RTC_API void rtcOccluded4(const int* valid, RTCScene scene, struct RTCRay4* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Tests a packet of 8 rays for occlusion with the scene. */
RTC_API void rtcOccluded8(const int* valid, RTCScene scene, struct RTCRay8* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Tests a packet of 16 rays for occlusion with the scene. */
RTC_API void rtcOccluded16(const int* valid, RTCScene scene, struct RTCRay16* ray, struct RTCOccludedArguments* args RTC_OPTIONAL_ARGUMENT);
/* Forwards single occlusion ray inside user geometry callback. */
RTC_SYCL_API void rtcForwardOccluded1(const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID);
/* Forwards single occlusion ray inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_SYCL_API void rtcForwardOccluded1Ex(const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards occlusion ray packet of size 4 inside user geometry callback. */
RTC_API void rtcForwardOccluded4(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID);
/* Forwards occlusion ray packet of size 4 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardOccluded4Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay4* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards occlusion ray packet of size 8 inside user geometry callback. */
RTC_API void rtcForwardOccluded8(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID);
/* Forwards occlusion ray packet of size 8 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardOccluded8Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay8* ray, unsigned int instID, unsigned int instPrimID);
/* Forwards occlusion ray packet of size 16 inside user geometry callback. */
RTC_API void rtcForwardOccluded16(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID);
/* Forwards occlusion ray packet of size 16 inside user geometry callback. Extended to handle instance arrays using instPrimID parameter. */
RTC_API void rtcForwardOccluded16Ex(const int* valid, const struct RTCOccludedFunctionNArguments* args, RTCScene scene, struct RTCRay16* ray, unsigned int instID, unsigned int instPrimID);
/*! collision callback */
struct RTCCollision { unsigned int geomID0; unsigned int primID0; unsigned int geomID1; unsigned int primID1; };
typedef void (*RTCCollideFunc) (void* userPtr, struct RTCCollision* collisions, unsigned int num_collisions);
/*! Performs collision detection of two scenes */
RTC_API void rtcCollide (RTCScene scene0, RTCScene scene1, RTCCollideFunc callback, void* userPtr);
#if defined(__cplusplus)
/* Helper for easily combining scene flags */
inline RTCSceneFlags operator|(RTCSceneFlags a, RTCSceneFlags b) {
return (RTCSceneFlags)((size_t)a | (size_t)b);
}
#endif
RTC_NAMESPACE_END

View File

@ -5,6 +5,7 @@
#include "../common/builder.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_sort.h"
namespace embree
{
@ -101,7 +102,7 @@ namespace embree
}
};
#if defined (__AVX2__)
#if defined (__AVX2__) || defined(__SYCL_DEVICE_ONLY__)
/*! for AVX2 there is a fast scalar bitInterleave */
struct MortonCodeGenerator

View File

@ -7,7 +7,7 @@
#define MBLUR_NUM_OBJECT_BINS 32
#include "../bvh/bvh.h"
#include "../common/primref_mb.h"
#include "../builders/primref_mb.h"
#include "heuristic_binning_array_aligned.h"
#include "heuristic_timesplit_array.h"
@ -141,16 +141,17 @@ namespace embree
struct VirtualRecalculatePrimRef
{
Scene* scene;
const SubGridBuildData * const sgrids;
__forceinline VirtualRecalculatePrimRef (Scene* scene)
: scene(scene) {}
__forceinline VirtualRecalculatePrimRef (Scene* scene, const SubGridBuildData * const sgrids = nullptr)
: scene(scene), sgrids(sgrids) {}
__forceinline PrimRefMB operator() (const PrimRefMB& prim, const BBox1f time_range) const
{
const unsigned geomID = prim.geomID();
const unsigned primID = prim.primID();
const Geometry* mesh = scene->get(geomID);
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range);
const LBBox3fa lbounds = mesh->vlinearBounds(primID, time_range, sgrids);
const range<int> tbounds = mesh->timeSegmentRange(time_range);
return PrimRefMB (lbounds, tbounds.size(), mesh->time_range, mesh->numTimeSegments(), geomID, primID);
}
@ -166,7 +167,7 @@ namespace embree
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range) const {
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range);
return scene->get(prim.geomID())->vlinearBounds(prim.primID(), time_range, sgrids);
}
__forceinline LBBox3fa linearBounds(const PrimRefMB& prim, const BBox1f time_range, const LinearSpace3fa& space) const {

View File

@ -7,13 +7,8 @@
#include "heuristic_spatial_array.h"
#include "heuristic_openmerge_array.h"
#if defined(__AVX512F__) && !defined(__AVX512VL__) // KNL
# define NUM_OBJECT_BINS 16
# define NUM_SPATIAL_BINS 16
#else
# define NUM_OBJECT_BINS 32
# define NUM_SPATIAL_BINS 16
#endif
#define NUM_OBJECT_BINS 32
#define NUM_SPATIAL_BINS 16
namespace embree
{

View File

@ -4,6 +4,7 @@
#pragma once
#include "priminfo.h"
#include "priminfo_mb.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_partition.h"
@ -391,6 +392,63 @@ namespace embree
return Split(bestSAH,bestDim,bestPos,mapping);
}
/*! finds the best split by scanning binning information */
__forceinline Split best_block_size(const BinMapping<BINS>& mapping, const size_t blockSize) const
{
/* sweep from right to left and compute parallel prefix of merged bounds */
vfloat4 rAreas[BINS];
vuint4 rCounts[BINS];
vuint4 count = 0; BBox bx = empty; BBox by = empty; BBox bz = empty;
for (size_t i=mapping.size()-1; i>0; i--)
{
count += counts(i);
rCounts[i] = count;
bx.extend(bounds(i,0)); rAreas[i][0] = expectedApproxHalfArea(bx);
by.extend(bounds(i,1)); rAreas[i][1] = expectedApproxHalfArea(by);
bz.extend(bounds(i,2)); rAreas[i][2] = expectedApproxHalfArea(bz);
rAreas[i][3] = 0.0f;
}
/* sweep from left to right and compute SAH */
vuint4 blocks_add = blockSize-1;
vfloat4 blocks_factor = 1.0f/float(blockSize);
vuint4 ii = 1; vfloat4 vbestSAH = pos_inf; vuint4 vbestPos = 0;
count = 0; bx = empty; by = empty; bz = empty;
for (size_t i=1; i<mapping.size(); i++, ii+=1)
{
count += counts(i-1);
bx.extend(bounds(i-1,0)); float Ax = expectedApproxHalfArea(bx);
by.extend(bounds(i-1,1)); float Ay = expectedApproxHalfArea(by);
bz.extend(bounds(i-1,2)); float Az = expectedApproxHalfArea(bz);
const vfloat4 lArea = vfloat4(Ax,Ay,Az,Az);
const vfloat4 rArea = rAreas[i];
const vfloat4 lCount = floor(vfloat4(count +blocks_add)*blocks_factor);
const vfloat4 rCount = floor(vfloat4(rCounts[i]+blocks_add)*blocks_factor);
const vfloat4 sah = madd(lArea,lCount,rArea*rCount);
vbestPos = select(sah < vbestSAH,ii ,vbestPos);
vbestSAH = select(sah < vbestSAH,sah,vbestSAH);
}
/* find best dimension */
float bestSAH = inf;
int bestDim = -1;
int bestPos = 0;
for (int dim=0; dim<3; dim++)
{
/* ignore zero sized dimensions */
if (unlikely(mapping.invalid(dim)))
continue;
/* test if this is a better dimension */
if (vbestSAH[dim] < bestSAH && vbestPos[dim] != 0) {
bestDim = dim;
bestPos = vbestPos[dim];
bestSAH = vbestSAH[dim];
}
}
return Split(bestSAH,bestDim,bestPos,mapping);
}
/*! calculates extended split information */
__forceinline void getSplitInfo(const BinMapping<BINS>& mapping, const Split& split, SplitInfoT<BBox>& info) const
{

View File

@ -23,6 +23,9 @@ namespace embree
__forceinline PrimInfoRange (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
: CentGeomBBox3fa(centGeomBounds), range<size_t>(begin,end) {}
__forceinline PrimInfoRange (range<size_t> r, const CentGeomBBox3fa& centGeomBounds)
: CentGeomBBox3fa(centGeomBounds), range<size_t>(r) {}
__forceinline float leafSAH() const {
return expectedApproxHalfArea(geomBounds)*float(size());
}
@ -30,8 +33,46 @@ namespace embree
__forceinline float leafSAH(size_t block_shift) const {
return expectedApproxHalfArea(geomBounds)*float((size()+(size_t(1)<<block_shift)-1) >> block_shift);
}
__forceinline range<size_t> get_range() const {
return range<size_t>(begin(),end());
}
template<typename PrimRef>
__forceinline void add_primref(const PrimRef& prim)
{
CentGeomBBox3fa::extend_primref(prim);
_end++;
}
};
inline void performFallbackSplit(PrimRef* const prims, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
const size_t begin = pinfo.begin();
const size_t end = pinfo.end();
const size_t center = (begin + end)/2;
CentGeomBBox3fa left(empty);
for (size_t i=begin; i<center; i++)
left.extend_center2(prims[i]);
new (&linfo) PrimInfoRange(begin,center,left);
CentGeomBBox3fa right(empty);
for (size_t i=center; i<end; i++)
right.extend_center2(prims[i]);
new (&rinfo) PrimInfoRange(center,end,right);
}
template<typename Type, typename getTypeFunc>
inline void performTypeSplit(const getTypeFunc& getType, Type type, PrimRef* const prims, range<size_t> range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
CentGeomBBox3fa local_left(empty), local_right(empty);
auto isLeft = [&] (const PrimRef& ref) { return type == getType(ref.geomID()); };
const size_t center = serial_partitioning(prims,range.begin(),range.end(),local_left,local_right,isLeft,CentGeomBBox3fa::extend_ref);
linfo = PrimInfoRange(make_range(range.begin(),center ),local_left);
rinfo = PrimInfoRange(make_range(center ,range.end()),local_right);
}
/*! Performs standard object binning */
template<typename PrimRef, size_t BINS>
struct HeuristicArrayBinningSAH
@ -69,6 +110,24 @@ namespace embree
return binner.best(mapping,logBlockSize);
}
/*! finds the best split */
__noinline const Split find_block_size(const PrimInfoRange& pinfo, const size_t blockSize)
{
if (likely(pinfo.size() < PARALLEL_THRESHOLD))
return find_block_size_template<false>(pinfo,blockSize);
else
return find_block_size_template<true>(pinfo,blockSize);
}
template<bool parallel>
__forceinline const Split find_block_size_template(const PrimInfoRange& pinfo, const size_t blockSize)
{
Binner binner(empty);
const BinMapping<BINS> mapping(pinfo);
bin_serial_or_parallel<parallel>(binner,prims,pinfo.begin(),pinfo.end(),PARALLEL_FIND_BLOCK_SIZE,mapping);
return binner.best_block_size(mapping,blockSize);
}
/*! array partitioning */
__forceinline void split(const Split& split, const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
@ -121,21 +180,8 @@ namespace embree
std::sort(&prims[pinfo.begin()],&prims[pinfo.end()]);
}
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo)
{
const size_t begin = pinfo.begin();
const size_t end = pinfo.end();
const size_t center = (begin + end)/2;
CentGeomBBox3fa left(empty);
for (size_t i=begin; i<center; i++)
left.extend_center2(prims[i]);
new (&linfo) PrimInfoRange(begin,center,left);
CentGeomBBox3fa right(empty);
for (size_t i=center; i<end; i++)
right.extend_center2(prims[i]);
new (&rinfo) PrimInfoRange(center,end,right);
void splitFallback(const PrimInfoRange& pinfo, PrimInfoRange& linfo, PrimInfoRange& rinfo) {
performFallbackSplit(prims,pinfo,linfo,rinfo);
}
void splitByGeometry(const range<size_t>& range, PrimInfoRange& linfo, PrimInfoRange& rinfo)
@ -156,6 +202,8 @@ namespace embree
PrimRef* const prims;
};
#if !defined(RTHWIF_STANDALONE)
/*! Performs standard object binning */
template<typename PrimRefMB, size_t BINS>
struct HeuristicArrayBinningMB
@ -196,5 +244,6 @@ namespace embree
new (&rset) SetMB(right,set.prims,range<size_t>(center,end ),set.time_range);
}
};
#endif
}
}

View File

@ -3,7 +3,6 @@
#pragma once
#include "../common/scene.h"
#include "priminfo.h"
namespace embree

View File

@ -3,7 +3,7 @@
#pragma once
#include "../common/primref_mb.h"
#include "../builders/primref_mb.h"
#include "../../common/algorithms/parallel_filter.h"
#define MBLUR_TIME_SPLIT_THRESHOLD 1.25f

View File

@ -3,9 +3,7 @@
#pragma once
#include "../common/default.h"
#include "../common/primref.h"
#include "../common/primref_mb.h"
#include "primref.h"
namespace embree
{
@ -41,6 +39,10 @@ namespace embree
centBounds.extend(center);
}
static void extend_ref (CentGeom& pinfo, const PrimRef& ref) {
pinfo.extend_primref(ref);
};
template<typename PrimRef>
__forceinline void extend_center2(const PrimRef& prim)
{
@ -84,6 +86,9 @@ namespace embree
__forceinline PrimInfoT (EmptyTy)
: CentGeom<BBox>(empty), begin(0), end(0) {}
__forceinline PrimInfoT (size_t N)
: CentGeom<BBox>(empty), begin(0), end(N) {}
__forceinline PrimInfoT (size_t begin, size_t end, const CentGeomBBox3fa& centGeomBounds)
: CentGeom<BBox>(centGeomBounds), begin(begin), end(end) {}
@ -158,205 +163,5 @@ namespace embree
typedef PrimInfoT<BBox3fa> PrimInfo;
//typedef PrimInfoT<LBBox3fa> PrimInfoMB;
/*! stores bounding information for a set of primitives */
template<typename BBox>
class PrimInfoMBT : public CentGeom<BBox>
{
public:
using CentGeom<BBox>::geomBounds;
using CentGeom<BBox>::centBounds;
__forceinline PrimInfoMBT () {
}
__forceinline PrimInfoMBT (EmptyTy)
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
__forceinline PrimInfoMBT (size_t begin, size_t end)
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
template<typename PrimRef>
__forceinline void add_primref(const PrimRef& prim)
{
CentGeom<BBox>::extend_primref(prim);
time_range.extend(prim.time_range);
object_range._end++;
num_time_segments += prim.size();
if (max_num_time_segments < prim.totalTimeSegments()) {
max_num_time_segments = prim.totalTimeSegments();
max_time_range = prim.time_range;
}
}
__forceinline void merge(const PrimInfoMBT& other)
{
CentGeom<BBox>::merge(other);
time_range.extend(other.time_range);
object_range._begin += other.object_range.begin();
object_range._end += other.object_range.end();
num_time_segments += other.num_time_segments;
if (max_num_time_segments < other.max_num_time_segments) {
max_num_time_segments = other.max_num_time_segments;
max_time_range = other.max_time_range;
}
}
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
PrimInfoMBT r = a; r.merge(b); return r;
}
__forceinline size_t begin() const {
return object_range.begin();
}
__forceinline size_t end() const {
return object_range.end();
}
/*! returns the number of primitives */
__forceinline size_t size() const {
return object_range.size();
}
__forceinline float halfArea() const {
return time_range.size()*expectedApproxHalfArea(geomBounds);
}
__forceinline float leafSAH() const {
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
}
__forceinline float leafSAH(size_t block_shift) const {
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
}
__forceinline float align_time(float ct) const
{
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
float t0 = (ct-max_time_range.lower)/max_time_range.size();
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
return t1*max_time_range.size()+max_time_range.lower;
}
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
{
return cout << "PrimInfo { " <<
"object_range = " << pinfo.object_range <<
", time_range = " << pinfo.time_range <<
", time_segments = " << pinfo.num_time_segments <<
", geomBounds = " << pinfo.geomBounds <<
", centBounds = " << pinfo.centBounds <<
"}";
}
public:
range<size_t> object_range; //!< primitive range
size_t num_time_segments; //!< total number of time segments of all added primrefs
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
};
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
struct SetMB : public PrimInfoMB
{
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
typedef mvector<PrimRefMB>* PrimRefVector;
__forceinline SetMB() {}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
: PrimInfoMB(pinfo_i), prims(prims) {}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
: PrimInfoMB(pinfo_i), prims(prims)
{
object_range = object_range_in;
time_range = intersect(time_range,time_range_in);
}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
: PrimInfoMB(pinfo_i), prims(prims)
{
time_range = intersect(time_range,time_range_in);
}
void deterministic_order() const
{
/* required as parallel partition destroys original primitive order */
PrimRefMB* prim = prims->data();
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
}
template<typename RecalculatePrimRef>
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
{
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
{
LBBox3fa cbounds(empty);
for (size_t j = r.begin(); j < r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
cbounds.extend(bn);
};
return cbounds;
};
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
reduce,
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
}
template<typename RecalculatePrimRef>
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
{
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
{
LBBox3fa cbounds(empty);
for (size_t j = r.begin(); j < r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
cbounds.extend(bn);
};
return cbounds;
};
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
reduce,
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
}
template<typename RecalculatePrimRef>
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
{
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
pinfo.add_primref(ref1);
};
return pinfo;
};
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
return SetMB(pinfo,prims,object_range,time_range);
}
public:
PrimRefVector prims;
};
//}
}

View File

@ -0,0 +1,210 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#pragma once
#include "primref_mb.h"
namespace embree
{
/*! stores bounding information for a set of primitives */
template<typename BBox>
class PrimInfoMBT : public CentGeom<BBox>
{
public:
using CentGeom<BBox>::geomBounds;
using CentGeom<BBox>::centBounds;
__forceinline PrimInfoMBT () {
}
__forceinline PrimInfoMBT (EmptyTy)
: CentGeom<BBox>(empty), object_range(0,0), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
__forceinline PrimInfoMBT (size_t begin, size_t end)
: CentGeom<BBox>(empty), object_range(begin,end), num_time_segments(0), max_num_time_segments(0), max_time_range(0.0f,1.0f), time_range(1.0f,0.0f) {}
template<typename PrimRef>
__forceinline void add_primref(const PrimRef& prim)
{
CentGeom<BBox>::extend_primref(prim);
time_range.extend(prim.time_range);
object_range._end++;
num_time_segments += prim.size();
if (max_num_time_segments < prim.totalTimeSegments()) {
max_num_time_segments = prim.totalTimeSegments();
max_time_range = prim.time_range;
}
}
__forceinline void merge(const PrimInfoMBT& other)
{
CentGeom<BBox>::merge(other);
time_range.extend(other.time_range);
object_range._begin += other.object_range.begin();
object_range._end += other.object_range.end();
num_time_segments += other.num_time_segments;
if (max_num_time_segments < other.max_num_time_segments) {
max_num_time_segments = other.max_num_time_segments;
max_time_range = other.max_time_range;
}
}
static __forceinline const PrimInfoMBT merge2(const PrimInfoMBT& a, const PrimInfoMBT& b) {
PrimInfoMBT r = a; r.merge(b); return r;
}
__forceinline size_t begin() const {
return object_range.begin();
}
__forceinline size_t end() const {
return object_range.end();
}
/*! returns the number of primitives */
__forceinline size_t size() const {
return object_range.size();
}
__forceinline float halfArea() const {
return time_range.size()*expectedApproxHalfArea(geomBounds);
}
__forceinline float leafSAH() const {
return time_range.size()*expectedApproxHalfArea(geomBounds)*float(num_time_segments);
}
__forceinline float leafSAH(size_t block_shift) const {
return time_range.size()*expectedApproxHalfArea(geomBounds)*float((num_time_segments+(size_t(1)<<block_shift)-1) >> block_shift);
}
__forceinline float align_time(float ct) const
{
//return roundf(ct * float(numTimeSegments)) / float(numTimeSegments);
float t0 = (ct-max_time_range.lower)/max_time_range.size();
float t1 = roundf(t0 * float(max_num_time_segments)) / float(max_num_time_segments);
return t1*max_time_range.size()+max_time_range.lower;
}
/*! stream output */
friend embree_ostream operator<<(embree_ostream cout, const PrimInfoMBT& pinfo)
{
return cout << "PrimInfo { " <<
"object_range = " << pinfo.object_range <<
", time_range = " << pinfo.time_range <<
", time_segments = " << pinfo.num_time_segments <<
", geomBounds = " << pinfo.geomBounds <<
", centBounds = " << pinfo.centBounds <<
"}";
}
public:
range<size_t> object_range; //!< primitive range
size_t num_time_segments; //!< total number of time segments of all added primrefs
size_t max_num_time_segments; //!< maximum number of time segments of a primitive
BBox1f max_time_range; //!< time range of primitive with max_num_time_segments
BBox1f time_range; //!< merged time range of primitives when merging prims, or additionally clipped with build time range when used in SetMB
};
typedef PrimInfoMBT<typename PrimRefMB::BBox> PrimInfoMB;
struct SetMB : public PrimInfoMB
{
static const size_t PARALLEL_THRESHOLD = 3 * 1024;
static const size_t PARALLEL_FIND_BLOCK_SIZE = 1024;
static const size_t PARALLEL_PARTITION_BLOCK_SIZE = 128;
typedef mvector<PrimRefMB>* PrimRefVector;
__forceinline SetMB() {}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims)
: PrimInfoMB(pinfo_i), prims(prims) {}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, range<size_t> object_range_in, BBox1f time_range_in)
: PrimInfoMB(pinfo_i), prims(prims)
{
object_range = object_range_in;
time_range = intersect(time_range,time_range_in);
}
__forceinline SetMB(const PrimInfoMB& pinfo_i, PrimRefVector prims, BBox1f time_range_in)
: PrimInfoMB(pinfo_i), prims(prims)
{
time_range = intersect(time_range,time_range_in);
}
void deterministic_order() const
{
/* required as parallel partition destroys original primitive order */
PrimRefMB* prim = prims->data();
std::sort(&prim[object_range.begin()],&prim[object_range.end()]);
}
template<typename RecalculatePrimRef>
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef) const
{
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
{
LBBox3fa cbounds(empty);
for (size_t j = r.begin(); j < r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range);
cbounds.extend(bn);
};
return cbounds;
};
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
reduce,
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
}
template<typename RecalculatePrimRef>
__forceinline LBBox3fa linearBounds(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
{
auto reduce = [&](const range<size_t>& r) -> LBBox3fa
{
LBBox3fa cbounds(empty);
for (size_t j = r.begin(); j < r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
const LBBox3fa bn = recalculatePrimRef.linearBounds(ref, time_range, space);
cbounds.extend(bn);
};
return cbounds;
};
return parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD, LBBox3fa(empty),
reduce,
[&](const LBBox3fa& b0, const LBBox3fa& b1) -> LBBox3fa { return embree::merge(b0, b1); });
}
template<typename RecalculatePrimRef>
const SetMB primInfo(const RecalculatePrimRef& recalculatePrimRef, const LinearSpace3fa& space) const
{
auto computePrimInfo = [&](const range<size_t>& r) -> PrimInfoMB
{
PrimInfoMB pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
PrimRefMB& ref = (*prims)[j];
PrimRefMB ref1 = recalculatePrimRef(ref,time_range,space);
pinfo.add_primref(ref1);
};
return pinfo;
};
const PrimInfoMB pinfo = parallel_reduce(object_range.begin(), object_range.end(), PARALLEL_FIND_BLOCK_SIZE, PARALLEL_THRESHOLD,
PrimInfoMB(empty), computePrimInfo, PrimInfoMB::merge2);
return SetMB(pinfo,prims,object_range,time_range);
}
public:
PrimRefVector prims;
};
//}
}

View File

@ -3,7 +3,7 @@
#pragma once
#include "default.h"
#include "../common/default.h"
namespace embree
{
@ -119,6 +119,7 @@ namespace embree
#endif
}
/************************************************************************************/
/************************************************************************************/
/************************************************************************************/

View File

@ -3,7 +3,7 @@
#pragma once
#include "default.h"
#include "../common/default.h"
#define MBLUR_BIN_LBBOX 1

View File

@ -55,6 +55,29 @@ namespace embree
return pinfo;
}
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, const size_t numPrimRefs, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor)
{
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator2 iter(scene,types,mblur);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfo pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,k,(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),(unsigned)geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
return pinfo;
}
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime)
{
ParallelForForPrefixSumState<PrimInfo> pstate;
@ -104,6 +127,32 @@ namespace embree
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, const size_t numPrimRefs, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
{
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator2 iter(scene,types,true);
/* first try */
progressMonitor(0);
pstate.init(iter,size_t(1024));
PrimInfoMB pinfo = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,k,(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
/* if we need to filter out geometry, run again */
if (pinfo.size() != numPrimRefs)
{
progressMonitor(0);
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](Geometry* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
}
/* the BVH starts with that time range, even though primitives might have smaller/larger time range */
pinfo.time_range = t0t1;
return pinfo;
}
template<typename Mesh>
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor)
{
@ -218,25 +267,7 @@ namespace embree
/* second run to fill primrefs and SubGridBuildData arrays */
pinfo = parallel_for_for_prefix_sum1( pstate, iter, PrimInfo(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfo& base) -> PrimInfo {
k = base.size();
size_t p_index = k;
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j)) continue;
const GridMesh::Grid &g = mesh->grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
BBox3fa bounds = empty;
if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
const PrimRef prim(bounds,(unsigned)geomID,(unsigned)p_index);
pinfo.add_center2(prim);
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[p_index++] = prim;
}
}
return pinfo;
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo.size() == numPrimitives);
return pinfo;
@ -269,31 +300,50 @@ namespace embree
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo
{
size_t p_index = base.size();
PrimInfo pinfo(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j)) continue;
const GridMesh::Grid &g = mesh->grid(j);
for (unsigned int y=0; y<g.resY-1u; y+=2)
for (unsigned int x=0; x<g.resX-1u; x+=2)
{
BBox3fa bounds = empty;
if (!mesh->buildBounds(g,x,y,bounds)) continue; // get bounds of subgrid
const PrimRef prim(bounds,geomID_,unsigned(p_index));
pinfo.add_center2(prim);
sgrids[p_index] = SubGridBuildData(x | g.get3x3FlagsX(x), y | g.get3x3FlagsY(y), unsigned(j));
prims[p_index++] = prim;
}
}
return pinfo;
pinfo = parallel_prefix_sum( pstate, size_t(0), mesh->size(), size_t(1024), PrimInfo(empty), [&](const range<size_t>& r, const PrimInfo& base) -> PrimInfo {
return mesh->createPrimRefArray(prims,sgrids,r,base.size(),geomID_);
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
return pinfo;
}
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1)
{
/* first run to get #primitives */
ParallelForForPrefixSumState<PrimInfoMB> pstate;
Scene::Iterator<GridMesh,true> iter(scene);
pstate.init(iter,size_t(1024));
/* iterate over all meshes in the scene */
PrimInfoMB pinfoMB = parallel_for_for_prefix_sum0( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t /*geomID*/) -> PrimInfoMB {
PrimInfoMB pinfoMB(empty);
for (size_t j=r.begin(); j<r.end(); j++)
{
if (!mesh->valid(j, mesh->timeSegmentRange(t0t1))) continue;
LBBox3fa bounds(empty);
PrimInfoMB gridMB(0,mesh->getNumSubGrids(j));
pinfoMB.merge(gridMB);
}
return pinfoMB;
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
size_t numPrimitives = pinfoMB.size();
if (numPrimitives == 0) return pinfoMB;
/* resize arrays */
sgrids.resize(numPrimitives);
prims.resize(numPrimitives);
/* second run to fill primrefs and SubGridBuildData arrays */
pinfoMB = parallel_for_for_prefix_sum1( pstate, iter, PrimInfoMB(empty), [&](GridMesh* mesh, const range<size_t>& r, size_t k, size_t geomID, const PrimInfoMB& base) -> PrimInfoMB {
return mesh->createPrimRefMBArray(prims,sgrids,t0t1,r,base.size(),(unsigned)geomID);
}, [](const PrimInfoMB& a, const PrimInfoMB& b) -> PrimInfoMB { return PrimInfoMB::merge2(a,b); });
assert(pinfoMB.size() == numPrimitives);
pinfoMB.time_range = t0t1;
return pinfoMB;
}
#endif
// ====================================================================================================
@ -304,5 +354,6 @@ namespace embree
IF_ENABLED_QUADS(template size_t createMortonCodeArray<QuadMesh>(QuadMesh* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_USER (template size_t createMortonCodeArray<UserGeometry>(UserGeometry* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_INSTANCE (template size_t createMortonCodeArray<Instance>(Instance* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
IF_ENABLED_INSTANCE_ARRAY (template size_t createMortonCodeArray<InstanceArray>(InstanceArray* mesh COMMA mvector<BVHBuilderMorton::BuildPrim>& morton COMMA BuildProgressMonitor& progressMonitor));
}
}

View File

@ -4,9 +4,8 @@
#pragma once
#include "../common/scene.h"
#include "../common/primref.h"
#include "../common/primref_mb.h"
#include "priminfo.h"
#include "priminfo_mb.h"
#include "bvh_builder_morton.h"
namespace embree
@ -17,18 +16,22 @@ namespace embree
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor);
PrimInfo createPrimRefArray(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimitives, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor);
PrimInfo createPrimRefArrayMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor, size_t itime = 0);
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
PrimInfoMB createPrimRefArrayMSMBlur(Scene* scene, Geometry::GTypeMask types, size_t numPrimitives, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
template<typename Mesh>
size_t createMortonCodeArray(Mesh* mesh, mvector<BVHBuilderMorton::BuildPrim>& morton, BuildProgressMonitor& progressMonitor);
/* special variants for grids */
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
PrimInfo createPrimRefArrayGrids(Scene* scene, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids); // FIXME: remove
PrimInfo createPrimRefArrayGrids(GridMesh* mesh, mvector<PrimRef>& prims, mvector<SubGridBuildData>& sgrids);
PrimInfoMB createPrimRefArrayMSMBlurGrid(Scene* scene, mvector<PrimRefMB>& prims, mvector<SubGridBuildData>& sgrids, BuildProgressMonitor& progressMonitor, BBox1f t0t1 = BBox1f(0.0f,1.0f));
}
}

View File

@ -3,10 +3,12 @@
#pragma once
#include "../builders/primrefgen.h"
#include "../../common/algorithms/parallel_reduce.h"
#include "../../common/algorithms/parallel_sort.h"
#include "../builders/heuristic_spatial.h"
#include "../builders/splitter.h"
#include "../../common/algorithms/parallel_partition.h"
#include "../../common/algorithms/parallel_for_for.h"
#include "../../common/algorithms/parallel_for_for_prefix_sum.h"
@ -14,96 +16,46 @@
#define CHECK_PRESPLIT(x)
#define GRID_SIZE 1024
//#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 6
#define MAX_PRESPLITS_PER_PRIMITIVE_LOG 5
#define MAX_PRESPLITS_PER_PRIMITIVE (1<<MAX_PRESPLITS_PER_PRIMITIVE_LOG)
#define PRIORITY_CUTOFF_THRESHOLD 1.0f
//#define PRIORITY_CUTOFF_THRESHOLD 2.0f
#define PRIORITY_SPLIT_POS_WEIGHT 1.5f
namespace embree
{
namespace isa
{
struct PresplitItem
struct SplittingGrid
{
union {
float priority;
unsigned int data;
};
unsigned int index;
__forceinline operator unsigned() const
__forceinline SplittingGrid(const BBox3fa& bounds)
{
return reinterpret_cast<const unsigned&>(priority);
}
__forceinline bool operator < (const PresplitItem& item) const
{
return (priority < item.priority);
base = bounds.lower;
const Vec3fa diag = bounds.size();
extend = max(diag.x,max(diag.y,diag.z));
scale = extend == 0.0f ? 0.0f : GRID_SIZE / extend;
}
template<typename Mesh>
__forceinline static float compute_priority(const PrimRef &ref, Scene *scene, const Vec2i &mc)
{
const unsigned int geomID = ref.geomID();
const unsigned int primID = ref.primID();
const float area_aabb = area(ref.bounds());
const float area_prim = ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
assert(area_prim <= area_aabb);
//const float priority = powf((area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
const float priority = sqrtf(sqrtf( (area_aabb - area_prim) * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
assert(priority >= 0.0f && priority < FLT_LARGE);
return priority;
}
};
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
return cout << "index " << item.index << " priority " << item.priority;
};
template<typename SplitterFactory>
void splitPrimitive(SplitterFactory &Splitter,
const PrimRef &prim,
const unsigned int geomID,
const unsigned int primID,
const unsigned int split_level,
const Vec3fa &grid_base,
const float grid_scale,
const float grid_extend,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
assert(split_level <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
if (split_level == 0)
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = prim;
}
else
__forceinline bool split_pos(const PrimRef& prim, unsigned int& dim_o, float& fsplit_o) const
{
/* compute morton code */
const Vec3fa lower = prim.lower;
const Vec3fa upper = prim.upper;
const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
Vec3ia ilower(floor(glower));
Vec3ia iupper(floor(gupper));
/* this ignores dimensions that are empty */
iupper = (Vec3ia)(select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper)));
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
/* compute a morton code for the lower and upper grid coordinates. */
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
/* if all bits are equal then we cannot split */
if(unlikely(lower_code == upper_code))
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = prim;
return;
}
if (unlikely(lower_code == upper_code))
return false;
/* compute octree level and dimension to perform the split in */
const unsigned int diff = 31 - lzcnt(lower_code^upper_code);
@ -115,24 +67,165 @@ namespace embree
/* compute world space position of split */
const float inv_grid_size = 1.0f / GRID_SIZE;
const float fsplit = grid_base[dim] + isplit * inv_grid_size * grid_extend;
const float fsplit = base[dim] + isplit * inv_grid_size * extend;
assert(prim.lower[dim] <= fsplit && prim.upper[dim] >= fsplit);
assert(prim.lower[dim] <= fsplit &&
prim.upper[dim] >= fsplit);
dim_o = dim;
fsplit_o = fsplit;
return true;
}
__forceinline Vec2i computeMC(const PrimRef& ref) const
{
const Vec3fa lower = ref.lower;
const Vec3fa upper = ref.upper;
const Vec3fa glower = (lower-base)*Vec3fa(scale)+Vec3fa(0.2f);
const Vec3fa gupper = (upper-base)*Vec3fa(scale)-Vec3fa(0.2f);
Vec3ia ilower(floor(glower));
Vec3ia iupper(floor(gupper));
/* this ignores dimensions that are empty */
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
/* compute a morton code for the lower and upper grid coordinates. */
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
return Vec2i(lower_code,upper_code);
}
Vec3fa base;
float scale;
float extend;
};
struct PresplitItem
{
union {
float priority;
unsigned int data;
};
unsigned int index;
__forceinline operator unsigned() const {
return data;
}
template<typename ProjectedPrimitiveAreaFunc>
__forceinline static float compute_priority(const ProjectedPrimitiveAreaFunc& primitiveArea, const PrimRef &ref, const Vec2i &mc)
{
const float area_aabb = area(ref.bounds());
const float area_prim = primitiveArea(ref);
if (area_prim == 0.0f) return 0.0f;
const unsigned int diff = 31 - lzcnt(mc.x^mc.y);
//assert(area_prim <= area_aabb); // may trigger due to numerical issues
const float area_diff = max(0.0f, area_aabb - area_prim);
//const float priority = powf(area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff),1.0f/4.0f);
const float priority = sqrtf(sqrtf( area_diff * powf(PRIORITY_SPLIT_POS_WEIGHT,(float)diff) ));
//const float priority = sqrtf(sqrtf( area_diff ) );
//const float priority = sqrtfarea_diff;
//const float priority = area_diff; // 104 fps !!!!!!!!!!
//const float priority = 0.2f*area_aabb + 0.8f*area_diff; // 104 fps
//const float priority = area_aabb * max(area_aabb/area_prim,32.0f);
//const float priority = area_prim;
assert(priority >= 0.0f && priority < FLT_LARGE);
return priority;
}
};
inline std::ostream &operator<<(std::ostream &cout, const PresplitItem& item) {
return cout << "index " << item.index << " priority " << item.priority;
};
#if 1
template<typename Splitter>
void splitPrimitive(const Splitter& splitter,
const PrimRef& prim,
const unsigned int splitprims,
const SplittingGrid& grid,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
assert(splitprims > 0 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
if (splitprims == 1)
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = prim;
}
else
{
unsigned int dim; float fsplit;
if (!grid.split_pos(prim, dim, fsplit))
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = prim;
return;
}
/* split primitive */
const auto splitter = Splitter(prim);
BBox3fa left,right;
splitter(prim.bounds(),dim,fsplit,left,right);
assert(!left.empty());
assert(!right.empty());
PrimRef left,right;
splitter(prim,dim,fsplit,left,right);
assert(!left.bounds().empty());
assert(!right.bounds().empty());
splitPrimitive(Splitter,PrimRef(left ,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
splitPrimitive(Splitter,PrimRef(right,geomID,primID),geomID,primID,split_level-1,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
const unsigned int splitprims_left = splitprims/2;
const unsigned int splitprims_right = splitprims - splitprims_left;
splitPrimitive(splitter,left,splitprims_left,grid,subPrims,numSubPrims);
splitPrimitive(splitter,right,splitprims_right,grid,subPrims,numSubPrims);
}
}
#else
template<typename Splitter>
void splitPrimitive(const Splitter& splitter,
const PrimRef& prim,
const unsigned int targetSubPrims,
const SplittingGrid& grid,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
assert(targetSubPrims > 0 && targetSubPrims <= MAX_PRESPLITS_PER_PRIMITIVE);
auto compare = [] ( const PrimRef& a, const PrimRef& b ) {
return area(a.bounds()) < area(b.bounds());
};
subPrims[numSubPrims++] = prim;
while (numSubPrims < targetSubPrims)
{
/* get top heap element */
std::pop_heap(subPrims+0,subPrims+numSubPrims, compare);
PrimRef top = subPrims[--numSubPrims];
unsigned int dim; float fsplit;
if (!grid.split_pos(top, dim, fsplit))
{
assert(numSubPrims < MAX_PRESPLITS_PER_PRIMITIVE);
subPrims[numSubPrims++] = top;
return;
}
/* split primitive */
PrimRef left,right;
splitter(top,dim,fsplit,left,right);
assert(!left.bounds().empty());
assert(!right.bounds().empty());
subPrims[numSubPrims++] = left;
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
subPrims[numSubPrims++] = right;
std::push_heap(subPrims+0, subPrims+numSubPrims, compare);
}
}
#endif
#if !defined(RTHWIF_STANDALONE)
template<typename Mesh, typename SplitterFactory>
PrimInfo createPrimRefArray_presplit(Geometry* geometry, unsigned int geomID, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
@ -155,30 +248,181 @@ namespace embree
}
return pinfo;
}
#endif
__forceinline Vec2i computeMC(const Vec3fa &grid_base, const float grid_scale, const PrimRef &ref)
template<typename SplitPrimitiveFunc, typename ProjectedPrimitiveAreaFunc, typename PrimVector>
PrimInfo createPrimRefArray_presplit(size_t numPrimRefs,
PrimVector& prims,
const PrimInfo& pinfo,
const SplitPrimitiveFunc& splitPrimitive,
const ProjectedPrimitiveAreaFunc& primitiveArea)
{
const Vec3fa lower = ref.lower;
const Vec3fa upper = ref.upper;
const Vec3fa glower = (lower-grid_base)*Vec3fa(grid_scale)+Vec3fa(0.2f);
const Vec3fa gupper = (upper-grid_base)*Vec3fa(grid_scale)-Vec3fa(0.2f);
Vec3ia ilower(floor(glower));
Vec3ia iupper(floor(gupper));
static const size_t MIN_STEP_SIZE = 128;
/* this ignores dimensions that are empty */
iupper = (Vec3ia)select(vint4(glower) >= vint4(gupper),vint4(ilower),vint4(iupper));
/* use correct number of primitives */
size_t numPrimitives = pinfo.size();
const size_t numPrimitivesExt = prims.size();
const size_t numSplitPrimitivesBudget = numPrimitivesExt - numPrimitives;
/* compute a morton code for the lower and upper grid coordinates. */
const unsigned int lower_code = bitInterleave(ilower.x,ilower.y,ilower.z);
const unsigned int upper_code = bitInterleave(iupper.x,iupper.y,iupper.z);
return Vec2i(lower_code,upper_code);
/* allocate double buffer presplit items */
avector<PresplitItem> preSplitItem0(numPrimitivesExt);
avector<PresplitItem> preSplitItem1(numPrimitivesExt);
/* compute grid */
SplittingGrid grid(pinfo.geomBounds);
/* init presplit items and get total sum */
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
float sum = 0.0f;
for (size_t i=r.begin(); i<r.end(); i++)
{
preSplitItem0[i].index = (unsigned int)i;
const Vec2i mc = grid.computeMC(prims[i]);
/* if all bits are equal then we cannot split */
preSplitItem0[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority(primitiveArea,prims[i],mc) : 0.0f;
/* FIXME: sum undeterministic */
sum += preSplitItem0[i].priority;
}
return sum;
},[](const float& a, const float& b) -> float { return a+b; });
/* compute number of splits per primitive */
const float inv_psum = 1.0f / psum;
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
for (size_t i=r.begin(); i<r.end(); i++)
{
if (preSplitItem0[i].priority <= 0.0f) {
preSplitItem0[i].data = 1;
continue;
}
const float rel_p = (float)numSplitPrimitivesBudget * preSplitItem0[i].priority * inv_psum;
if (rel_p < 1) {
preSplitItem0[i].data = 1;
continue;
}
//preSplitItem0[i].data = max(min(ceilf(rel_p),(float)MAX_PRESPLITS_PER_PRIMITIVE),1.0f);
preSplitItem0[i].data = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
preSplitItem0[i].data = 1 << preSplitItem0[i].data;
assert(preSplitItem0[i].data <= MAX_PRESPLITS_PER_PRIMITIVE);
}
});
auto isLeft = [&] (const PresplitItem &ref) { return ref.data <= 1; };
size_t center = parallel_partitioning(preSplitItem0.data(),0,numPrimitives,isLeft,1024);
assert(center <= numPrimitives);
/* anything to split ? */
if (center >= numPrimitives)
return pinfo;
size_t numPrimitivesToSplit = numPrimitives - center;
assert(preSplitItem0[center].data >= 1.0f);
/* sort presplit items in ascending order */
radix_sort_u32(preSplitItem0.data() + center,preSplitItem1.data() + center,numPrimitivesToSplit,1024);
CHECK_PRESPLIT(
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
for (size_t i=r.begin(); i<r.end(); i++)
assert(preSplitItem0[i-1].data <= preSplitItem0[i].data);
});
);
unsigned int* primOffset0 = (unsigned int*)preSplitItem1.data();
unsigned int* primOffset1 = (unsigned int*)preSplitItem1.data() + numPrimitivesToSplit;
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
size_t sum = 0;
for (size_t i=t.begin(); i<t.end(); i++)
{
const unsigned int primrefID = preSplitItem0[i].index;
const unsigned int splitprims = preSplitItem0[i].data;
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
unsigned int numSubPrims = 0;
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
assert(numSubPrims);
numSubPrims--; // can reuse slot
sum+=numSubPrims;
preSplitItem0[i].data = (numSubPrims << 16) | splitprims;
primOffset0[i-center] = numSubPrims;
}
return sum;
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
/* if we are over budget, need to shrink the range */
if (totalNumSubPrims > numSplitPrimitivesBudget)
{
size_t new_center = numPrimitives-1;
size_t sum = 0;
for (;new_center>=center;new_center--)
{
const unsigned int numSubPrims = preSplitItem0[new_center].data >> 16;
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
sum += numSubPrims;
}
new_center++;
primOffset0 += new_center - center;
numPrimitivesToSplit -= new_center - center;
center = new_center;
assert(numPrimitivesToSplit == (numPrimitives - center));
}
/* parallel prefix sum to compute offsets for storing sub-primitives */
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
assert(numPrimitives+offset <= numPrimitivesExt);
/* iterate over range, and split primitives into sub primitives and append them to prims array */
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
for (size_t j=rn.begin(); j<rn.end(); j++)
{
const unsigned int primrefID = preSplitItem0[j].index;
const unsigned int splitprims = preSplitItem0[j].data & 0xFFFF;
assert(splitprims >= 1 && splitprims <= MAX_PRESPLITS_PER_PRIMITIVE);
unsigned int numSubPrims = 0;
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
splitPrimitive(prims[primrefID],splitprims,grid,subPrims,numSubPrims);
const unsigned int numSubPrimsExpected MAYBE_UNUSED = preSplitItem0[j].data >> 16;
assert(numSubPrims-1 == numSubPrimsExpected);
const size_t newID = numPrimitives + primOffset1[j-center];
assert(newID+numSubPrims-1 <= numPrimitivesExt);
prims[primrefID] = subPrims[0];
for (size_t i=1;i<numSubPrims;i++)
prims[newID+i-1] = subPrims[i];
}
});
numPrimitives += offset;
/* recompute centroid bounding boxes */
const PrimInfo pinfo1 = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
PrimInfo p(empty);
for (size_t j=r.begin(); j<r.end(); j++)
p.add_center2(prims[j]);
return p;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo1.size() == numPrimitives);
return pinfo1;
}
#if !defined(RTHWIF_STANDALONE)
template<typename Mesh, typename SplitterFactory>
PrimInfo createPrimRefArray_presplit(Scene* scene, Geometry::GTypeMask types, bool mblur, size_t numPrimRefs, mvector<PrimRef>& prims, BuildProgressMonitor& progressMonitor)
{
static const size_t MIN_STEP_SIZE = 128;
ParallelForForPrefixSumState<PrimInfo> pstate;
Scene::Iterator2 iter(scene,types,mblur);
@ -198,179 +442,27 @@ namespace embree
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
}
/* use correct number of primitives */
size_t numPrimitives = pinfo.size();
const size_t alloc_numPrimitives = prims.size();
const size_t numSplitPrimitivesBudget = alloc_numPrimitives - numPrimitives;
/* set up primitive splitter */
SplitterFactory Splitter(scene);
DBG_PRESPLIT(
const size_t org_numPrimitives = pinfo.size();
PRINT(numPrimitives);
PRINT(alloc_numPrimitives);
PRINT(numSplitPrimitivesBudget);
);
/* allocate double buffer presplit items */
const size_t presplit_allocation_size = sizeof(PresplitItem)*alloc_numPrimitives;
PresplitItem *presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
PresplitItem *tmp_presplitItem = (PresplitItem*)alignedMalloc(presplit_allocation_size,64);
/* compute grid */
const Vec3fa grid_base = pinfo.geomBounds.lower;
const Vec3fa grid_diag = pinfo.geomBounds.size();
const float grid_extend = max(grid_diag.x,max(grid_diag.y,grid_diag.z));
const float grid_scale = grid_extend == 0.0f ? 0.0f : GRID_SIZE / grid_extend;
/* init presplit items and get total sum */
const float psum = parallel_reduce( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), 0.0f, [&](const range<size_t>& r) -> float {
float sum = 0.0f;
for (size_t i=r.begin(); i<r.end(); i++)
auto split_primitive = [&] (const PrimRef &prim,
const unsigned int splitprims,
const SplittingGrid& grid,
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE],
unsigned int& numSubPrims)
{
presplitItem[i].index = (unsigned int)i;
const Vec2i mc = computeMC(grid_base,grid_scale,prims[i]);
/* if all bits are equal then we cannot split */
presplitItem[i].priority = (mc.x != mc.y) ? PresplitItem::compute_priority<Mesh>(prims[i],scene,mc) : 0.0f;
/* FIXME: sum undeterministic */
sum += presplitItem[i].priority;
}
return sum;
},[](const float& a, const float& b) -> float { return a+b; });
/* compute number of splits per primitive */
const float inv_psum = 1.0f / psum;
parallel_for( size_t(0), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
for (size_t i=r.begin(); i<r.end(); i++)
{
if (presplitItem[i].priority > 0.0f)
{
const float rel_p = (float)numSplitPrimitivesBudget * presplitItem[i].priority * inv_psum;
if (rel_p >= PRIORITY_CUTOFF_THRESHOLD) // need at least a split budget that generates two sub-prims
{
presplitItem[i].priority = max(min(ceilf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG),1.0f);
//presplitItem[i].priority = min(floorf(logf(rel_p)/logf(2.0f)),(float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
assert(presplitItem[i].priority >= 0.0f && presplitItem[i].priority <= (float)MAX_PRESPLITS_PER_PRIMITIVE_LOG);
}
else
presplitItem[i].priority = 0.0f;
}
}
});
auto isLeft = [&] (const PresplitItem &ref) { return ref.priority < PRIORITY_CUTOFF_THRESHOLD; };
size_t center = parallel_partitioning(presplitItem,0,numPrimitives,isLeft,1024);
/* anything to split ? */
if (center < numPrimitives)
{
size_t numPrimitivesToSplit = numPrimitives - center;
assert(presplitItem[center].priority >= 1.0f);
/* sort presplit items in ascending order */
radix_sort_u32(presplitItem + center,tmp_presplitItem + center,numPrimitivesToSplit,1024);
CHECK_PRESPLIT(
parallel_for( size_t(center+1), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& r) -> void {
for (size_t i=r.begin(); i<r.end(); i++)
assert(presplitItem[i-1].priority <= presplitItem[i].priority);
});
);
unsigned int* primOffset0 = (unsigned int*)tmp_presplitItem;
unsigned int* primOffset1 = (unsigned int*)tmp_presplitItem + numPrimitivesToSplit;
/* compute actual number of sub-primitives generated within the [center;numPrimitives-1] range */
const size_t totalNumSubPrims = parallel_reduce( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), size_t(0), [&](const range<size_t>& t) -> size_t {
size_t sum = 0;
for (size_t i=t.begin(); i<t.end(); i++)
{
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
assert(presplitItem[i].priority >= 1.0f);
const unsigned int primrefID = presplitItem[i].index;
const float prio = presplitItem[i].priority;
const unsigned int geomID = prims[primrefID].geomID();
const unsigned int primID = prims[primrefID].primID();
const unsigned int split_levels = (unsigned int)prio;
unsigned int numSubPrims = 0;
splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
assert(numSubPrims);
numSubPrims--; // can reuse slot
sum+=numSubPrims;
presplitItem[i].data = (numSubPrims << MAX_PRESPLITS_PER_PRIMITIVE_LOG) | split_levels;
primOffset0[i-center] = numSubPrims;
}
return sum;
},[](const size_t& a, const size_t& b) -> size_t { return a+b; });
/* if we are over budget, need to shrink the range */
if (totalNumSubPrims > numSplitPrimitivesBudget)
{
size_t new_center = numPrimitives-1;
size_t sum = 0;
for (;new_center>=center;new_center--)
{
const unsigned int numSubPrims = presplitItem[new_center].data >> MAX_PRESPLITS_PER_PRIMITIVE_LOG;
if (unlikely(sum + numSubPrims >= numSplitPrimitivesBudget)) break;
sum += numSubPrims;
}
new_center++;
primOffset0 += new_center - center;
numPrimitivesToSplit -= new_center - center;
center = new_center;
assert(numPrimitivesToSplit == (numPrimitives - center));
}
/* parallel prefix sum to compute offsets for storing sub-primitives */
const unsigned int offset = parallel_prefix_sum(primOffset0,primOffset1,numPrimitivesToSplit,(unsigned int)0,std::plus<unsigned int>());
assert(numPrimitives+offset <= alloc_numPrimitives);
/* iterate over range, and split primitives into sub primitives and append them to prims array */
parallel_for( size_t(center), numPrimitives, size_t(MIN_STEP_SIZE), [&](const range<size_t>& rn) -> void {
for (size_t j=rn.begin(); j<rn.end(); j++)
{
PrimRef subPrims[MAX_PRESPLITS_PER_PRIMITIVE];
const unsigned int primrefID = presplitItem[j].index;
const unsigned int geomID = prims[primrefID].geomID();
const unsigned int primID = prims[primrefID].primID();
const unsigned int split_levels = presplitItem[j].data & ((unsigned int)(1 << MAX_PRESPLITS_PER_PRIMITIVE_LOG)-1);
assert(split_levels);
assert(split_levels <= MAX_PRESPLITS_PER_PRIMITIVE_LOG);
unsigned int numSubPrims = 0;
splitPrimitive(Splitter,prims[primrefID],geomID,primID,split_levels,grid_base,grid_scale,grid_extend,subPrims,numSubPrims);
const size_t newID = numPrimitives + primOffset1[j-center];
assert(newID+numSubPrims-1 <= alloc_numPrimitives);
prims[primrefID] = subPrims[0];
for (size_t i=1;i<numSubPrims;i++)
prims[newID+i-1] = subPrims[i];
}
});
numPrimitives += offset;
DBG_PRESPLIT(
PRINT(pinfo.size());
PRINT(numPrimitives);
PRINT((float)numPrimitives/org_numPrimitives));
}
/* recompute centroid bounding boxes */
pinfo = parallel_reduce(size_t(0),numPrimitives,size_t(MIN_STEP_SIZE),PrimInfo(empty),[&] (const range<size_t>& r) -> PrimInfo {
PrimInfo p(empty);
for (size_t j=r.begin(); j<r.end(); j++)
p.add_center2(prims[j]);
return p;
}, [](const PrimInfo& a, const PrimInfo& b) -> PrimInfo { return PrimInfo::merge(a,b); });
assert(pinfo.size() == numPrimitives);
/* free double buffer presplit items */
alignedFree(tmp_presplitItem);
alignedFree(presplitItem);
return pinfo;
const auto splitter = Splitter(prim);
splitPrimitive(splitter,prim,splitprims,grid,subPrims,numSubPrims);
};
auto primitiveArea = [&] (const PrimRef &ref) {
const unsigned int geomID = ref.geomID();
const unsigned int primID = ref.primID();
return ((Mesh*)scene->get(geomID))->projectedPrimitiveArea(primID);
};
return createPrimRefArray_presplit(numPrimRefs,prims,pinfo,split_primitive,primitiveArea);
}
#endif
}
}

View File

@ -3,13 +3,51 @@
#pragma once
#if !defined(RTHWIF_STANDALONE)
#include "../common/scene.h"
#include "../common/primref.h"
#endif
#include "../builders/primref.h"
namespace embree
{
namespace isa
{
template<size_t N>
__forceinline void splitPolygon(const BBox3fa& bounds,
const size_t dim,
const float pos,
const Vec3fa (&v)[N+1],
BBox3fa& left_o,
BBox3fa& right_o)
{
BBox3fa left = empty, right = empty;
/* clip triangle to left and right box by processing all edges */
for (size_t i=0; i<N; i++)
{
const Vec3fa &v0 = v[i];
const Vec3fa &v1 = v[i+1];
const float v0d = v0[dim];
const float v1d = v1[dim];
if (v0d <= pos) left. extend(v0); // this point is on left side
if (v0d >= pos) right.extend(v0); // this point is on right side
if ((v0d < pos && pos < v1d) || (v1d < pos && pos < v0d)) // the edge crosses the splitting location
{
assert((v1d-v0d) != 0.0f);
const float inv_length = 1.0f/(v1d-v0d);
const Vec3fa c = madd(Vec3fa((pos-v0d)*inv_length),v1-v0,v0);
left.extend(c);
right.extend(c);
}
}
/* clip against current bounds */
left_o = intersect(left,bounds);
right_o = intersect(right,bounds);
}
template<size_t N>
__forceinline void splitPolygon(const BBox3fa& bounds,
const size_t dim,
@ -79,6 +117,8 @@ namespace embree
new (&right_o) PrimRef(intersect(right,prim.bounds()),prim.geomID(), prim.primID());
}
#if !defined(RTHWIF_STANDALONE)
struct TriangleSplitter
{
__forceinline TriangleSplitter(const Scene* scene, const PrimRef& prim)
@ -173,6 +213,13 @@ namespace embree
__forceinline DummySplitter(const Scene* scene, const PrimRef& prim)
{
}
__forceinline void operator() (const PrimRef& prim, const size_t dim, const float pos, PrimRef& left_o, PrimRef& right_o) const {
}
__forceinline void operator() (const BBox3fa& prim, const size_t dim, const float pos, BBox3fa& left_o, BBox3fa& right_o) const {
}
};
struct DummySplitterFactory
@ -187,7 +234,7 @@ namespace embree
private:
const Scene* scene;
};
#endif
}
}

View File

@ -17,6 +17,7 @@
#include "../geometry/subdivpatch1.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/accelinstance.h"
@ -66,6 +67,9 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
DECLARE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
@ -104,6 +108,9 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
@ -142,6 +149,9 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
@ -180,33 +190,20 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4IntersectorStreamPacketFallback);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4IntersectorStreamMoellerNoFilter);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4vIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Triangle4iIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamMoellerNoFilter);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4vIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4Quad4iIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4MeshSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4vMeshSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelTriangle4iMeshSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4Curve4iBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
@ -238,6 +235,9 @@ namespace embree
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
@ -266,6 +266,7 @@ namespace embree
IF_ENABLED_QUADS (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelQuadMeshSAH));
IF_ENABLED_USER (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelVirtualSAH));
IF_ENABLED_INSTANCE (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelInstanceSAH));
IF_ENABLED_INSTANCE_ARRAY (SELECT_SYMBOL_DEFAULT_AVX(features,BVH4BuilderTwoLevelInstanceArraySAH));
IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4vBuilder_OBB_New));
IF_ENABLED_CURVES_OR_POINTS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4Curve4iBuilder_OBB_New));
@ -297,6 +298,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceSceneBuilderSAH));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceMBSceneBuilderSAH));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceArraySceneBuilderSAH));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4InstanceArrayMBSceneBuilderSAH));
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridSceneBuilderSAH));
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_AVX(features,BVH4GridMBSceneBuilderSAH));
@ -349,6 +353,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersector1));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector1));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayIntersector1));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayMBIntersector1));
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector1Moeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridMBIntersector1Moeller))
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector1Pluecker));
@ -390,6 +397,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersector4Chunk));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector4Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayIntersector4Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceArrayMBIntersector4Chunk));
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersector4HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4GridIntersector4HybridMoeller));
@ -431,6 +441,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceIntersector8Chunk));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceMBIntersector8Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceArrayIntersector8Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4InstanceArrayMBIntersector8Chunk));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridIntersector8HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridMBIntersector8HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH4GridIntersector8HybridPluecker));
@ -470,29 +483,13 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceIntersector16Chunk));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceMBIntersector16Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceArrayIntersector16Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH4InstanceArrayMBIntersector16Chunk));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridIntersector16HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridMBIntersector16HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH4GridIntersector16HybridPluecker));
/* select stream intersectors */
SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4IntersectorStreamPacketFallback);
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4IntersectorStreamMoeller));
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4IntersectorStreamMoellerNoFilter));
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersectorStreamMoeller));
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4vIntersectorStreamPluecker));
IF_ENABLED_TRIS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Triangle4iIntersectorStreamPluecker));
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamMoeller));
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamMoellerNoFilter));
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersectorStreamMoeller));
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4vIntersectorStreamPluecker));
IF_ENABLED_QUADS(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4Quad4iIntersectorStreamPluecker));
IF_ENABLED_USER(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4VirtualIntersectorStream));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_DEFAULT_SSE42_AVX_AVX2_AVX512(features,BVH4InstanceIntersectorStream));
#endif
}
@ -509,7 +506,6 @@ namespace embree
intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4Hybrid();
intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8Hybrid();
intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16Hybrid();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -523,7 +519,6 @@ namespace embree
intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4Hybrid();
intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8Hybrid();
intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16Hybrid();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -545,7 +540,6 @@ namespace embree
intersectors.intersector4 = BVH4OBBVirtualCurveIntersector4HybridMB();
intersectors.intersector8 = BVH4OBBVirtualCurveIntersector8HybridMB();
intersectors.intersector16 = BVH4OBBVirtualCurveIntersector16HybridMB();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -559,7 +553,6 @@ namespace embree
intersectors.intersector4 = BVH4OBBVirtualCurveIntersectorRobust4HybridMB();
intersectors.intersector8 = BVH4OBBVirtualCurveIntersectorRobust8HybridMB();
intersectors.intersector16 = BVH4OBBVirtualCurveIntersectorRobust16HybridMB();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -581,8 +574,6 @@ namespace embree
intersectors.intersector8_nofilter = BVH4Triangle4Intersector8HybridMoellerNoFilter();
intersectors.intersector16_filter = BVH4Triangle4Intersector16HybridMoeller();
intersectors.intersector16_nofilter = BVH4Triangle4Intersector16HybridMoellerNoFilter();
intersectors.intersectorN_filter = BVH4Triangle4IntersectorStreamMoeller();
intersectors.intersectorN_nofilter = BVH4Triangle4IntersectorStreamMoellerNoFilter();
#endif
return intersectors;
}
@ -597,7 +588,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4vIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Triangle4vIntersector8HybridPluecker();
intersectors.intersector16 = BVH4Triangle4vIntersector16HybridPluecker();
intersectors.intersectorN = BVH4Triangle4vIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -614,7 +604,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4iIntersector4HybridMoeller();
intersectors.intersector8 = BVH4Triangle4iIntersector8HybridMoeller();
intersectors.intersector16 = BVH4Triangle4iIntersector16HybridMoeller();
intersectors.intersectorN = BVH4Triangle4iIntersectorStreamMoeller();
#endif
return intersectors;
}
@ -627,7 +616,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4iIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Triangle4iIntersector8HybridPluecker();
intersectors.intersector16 = BVH4Triangle4iIntersector16HybridPluecker();
intersectors.intersectorN = BVH4Triangle4iIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -647,7 +635,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridMoeller();
intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -660,7 +647,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4vMBIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Triangle4vMBIntersector8HybridPluecker();
intersectors.intersector16 = BVH4Triangle4vMBIntersector16HybridPluecker();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -680,7 +666,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridMoeller();
intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -693,7 +678,6 @@ namespace embree
intersectors.intersector4 = BVH4Triangle4iMBIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Triangle4iMBIntersector8HybridPluecker();
intersectors.intersector16 = BVH4Triangle4iMBIntersector16HybridPluecker();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -716,8 +700,6 @@ namespace embree
intersectors.intersector8_nofilter = BVH4Quad4vIntersector8HybridMoellerNoFilter();
intersectors.intersector16_filter = BVH4Quad4vIntersector16HybridMoeller();
intersectors.intersector16_nofilter = BVH4Quad4vIntersector16HybridMoellerNoFilter();
intersectors.intersectorN_filter = BVH4Quad4vIntersectorStreamMoeller();
intersectors.intersectorN_nofilter = BVH4Quad4vIntersectorStreamMoellerNoFilter();
#endif
return intersectors;
}
@ -730,7 +712,6 @@ namespace embree
intersectors.intersector4 = BVH4Quad4vIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Quad4vIntersector8HybridPluecker();
intersectors.intersector16 = BVH4Quad4vIntersector16HybridPluecker();
intersectors.intersectorN = BVH4Quad4vIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -750,7 +731,6 @@ namespace embree
intersectors.intersector4 = BVH4Quad4iIntersector4HybridMoeller();
intersectors.intersector8 = BVH4Quad4iIntersector8HybridMoeller();
intersectors.intersector16= BVH4Quad4iIntersector16HybridMoeller();
intersectors.intersectorN = BVH4Quad4iIntersectorStreamMoeller();
#endif
return intersectors;
}
@ -763,7 +743,6 @@ namespace embree
intersectors.intersector4 = BVH4Quad4iIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Quad4iIntersector8HybridPluecker();
intersectors.intersector16= BVH4Quad4iIntersector16HybridPluecker();
intersectors.intersectorN = BVH4Quad4iIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -783,7 +762,6 @@ namespace embree
intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridMoeller();
intersectors.intersector16= BVH4Quad4iMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -796,7 +774,6 @@ namespace embree
intersectors.intersector4 = BVH4Quad4iMBIntersector4HybridPluecker();
intersectors.intersector8 = BVH4Quad4iMBIntersector8HybridPluecker();
intersectors.intersector16= BVH4Quad4iMBIntersector16HybridPluecker();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -829,7 +806,6 @@ namespace embree
intersectors.intersector4 = BVH4VirtualIntersector4Chunk();
intersectors.intersector8 = BVH4VirtualIntersector8Chunk();
intersectors.intersector16 = BVH4VirtualIntersector16Chunk();
intersectors.intersectorN = BVH4VirtualIntersectorStream();
#endif
intersectors.collider = BVH4ColliderUserGeom();
return intersectors;
@ -844,7 +820,6 @@ namespace embree
intersectors.intersector4 = BVH4VirtualMBIntersector4Chunk();
intersectors.intersector8 = BVH4VirtualMBIntersector8Chunk();
intersectors.intersector16 = BVH4VirtualMBIntersector16Chunk();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -858,7 +833,6 @@ namespace embree
intersectors.intersector4 = BVH4InstanceIntersector4Chunk();
intersectors.intersector8 = BVH4InstanceIntersector8Chunk();
intersectors.intersector16 = BVH4InstanceIntersector16Chunk();
intersectors.intersectorN = BVH4InstanceIntersectorStream();
#endif
return intersectors;
}
@ -872,7 +846,32 @@ namespace embree
intersectors.intersector4 = BVH4InstanceMBIntersector4Chunk();
intersectors.intersector8 = BVH4InstanceMBIntersector8Chunk();
intersectors.intersector16 = BVH4InstanceMBIntersector16Chunk();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
Accel::Intersectors BVH4Factory::BVH4InstanceArrayIntersectors(BVH4* bvh)
{
Accel::Intersectors intersectors;
intersectors.ptr = bvh;
intersectors.intersector1 = BVH4InstanceArrayIntersector1();
#if defined (EMBREE_RAY_PACKETS)
intersectors.intersector4 = BVH4InstanceArrayIntersector4Chunk();
intersectors.intersector8 = BVH4InstanceArrayIntersector8Chunk();
intersectors.intersector16 = BVH4InstanceArrayIntersector16Chunk();
#endif
return intersectors;
}
Accel::Intersectors BVH4Factory::BVH4InstanceArrayMBIntersectors(BVH4* bvh)
{
Accel::Intersectors intersectors;
intersectors.ptr = bvh;
intersectors.intersector1 = BVH4InstanceArrayMBIntersector1();
#if defined (EMBREE_RAY_PACKETS)
intersectors.intersector4 = BVH4InstanceArrayMBIntersector4Chunk();
intersectors.intersector8 = BVH4InstanceArrayMBIntersector8Chunk();
intersectors.intersector16 = BVH4InstanceArrayMBIntersector16Chunk();
#endif
return intersectors;
}
@ -886,7 +885,6 @@ namespace embree
intersectors.intersector4 = BVH4SubdivPatch1Intersector4();
intersectors.intersector8 = BVH4SubdivPatch1Intersector8();
intersectors.intersector16 = BVH4SubdivPatch1Intersector16();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -900,7 +898,6 @@ namespace embree
intersectors.intersector4 = BVH4SubdivPatch1MBIntersector4();
intersectors.intersector8 = BVH4SubdivPatch1MBIntersector8();
intersectors.intersector16 = BVH4SubdivPatch1MBIntersector16();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -1255,6 +1252,35 @@ namespace embree
return new AccelInstance(accel,builder,intersectors);
}
Accel* BVH4Factory::BVH4InstanceArray(Scene* scene, BuildVariant bvariant)
{
BVH4* accel = new BVH4(InstanceArrayPrimitive::type,scene);
Accel::Intersectors intersectors = BVH4InstanceArrayIntersectors(accel);
auto gtype = Geometry::MTY_INSTANCE_ARRAY;
Builder* builder = nullptr;
if (scene->device->object_builder == "default") {
switch (bvariant) {
case BuildVariant::STATIC : builder = BVH4InstanceArraySceneBuilderSAH(accel,scene,gtype); break;
case BuildVariant::DYNAMIC : builder = BVH4BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false); break;
case BuildVariant::HIGH_QUALITY: assert(false); break;
}
}
else if (scene->device->object_builder == "sah") { builder = BVH4InstanceArraySceneBuilderSAH(accel,scene,gtype); }
else if (scene->device->object_builder == "dynamic") { builder = BVH4BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false); }
else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH4<Object>");
return new AccelInstance(accel,builder,intersectors);
}
Accel* BVH4Factory::BVH4InstanceArrayMB(Scene* scene)
{
BVH4* accel = new BVH4(InstanceArrayPrimitive::type,scene);
Accel::Intersectors intersectors = BVH4InstanceArrayMBIntersectors(accel);
Builder* builder = BVH4InstanceArrayMBSceneBuilderSAH(accel,scene,Geometry::MTY_INSTANCE_ARRAY);
return new AccelInstance(accel,builder,intersectors);
}
Accel::Intersectors BVH4Factory::BVH4GridIntersectors(BVH4* bvh, IntersectVariant ivariant)
{
Accel::Intersectors intersectors;
@ -1266,7 +1292,6 @@ namespace embree
intersectors.intersector4 = BVH4GridIntersector4HybridMoeller();
intersectors.intersector8 = BVH4GridIntersector8HybridMoeller();
intersectors.intersector16 = BVH4GridIntersector16HybridMoeller();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
}
else /* if (ivariant == IntersectVariant::ROBUST) */
@ -1276,7 +1301,6 @@ namespace embree
intersectors.intersector4 = BVH4GridIntersector4HybridPluecker();
intersectors.intersector8 = BVH4GridIntersector8HybridPluecker();
intersectors.intersector16 = BVH4GridIntersector16HybridPluecker();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
}
return intersectors;
@ -1291,7 +1315,6 @@ namespace embree
intersectors.intersector4 = BVH4GridMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH4GridMBIntersector8HybridMoeller();
intersectors.intersector16 = BVH4GridMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH4IntersectorStreamPacketFallback();
#endif
return intersectors;
}

View File

@ -48,6 +48,9 @@ namespace embree
Accel* BVH4Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4InstanceMB(Scene* scene, bool isExpensive);
Accel* BVH4InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH4InstanceArrayMB(Scene* scene);
Accel* BVH4Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH4GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
@ -78,6 +81,9 @@ namespace embree
Accel::Intersectors BVH4InstanceIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceArrayIntersectors(BVH4* bvh);
Accel::Intersectors BVH4InstanceArrayMBIntersectors(BVH4* bvh);
Accel::Intersectors BVH4SubdivPatch1Intersectors(BVH4* bvh);
Accel::Intersectors BVH4SubdivPatch1MBIntersectors(BVH4* bvh);
@ -123,6 +129,9 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4InstanceArrayMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH4GridIntersector1Pluecker);
@ -161,6 +170,9 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4InstanceArrayMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridMBIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH4GridIntersector4HybridPluecker);
@ -201,6 +213,9 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4InstanceArrayMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridMBIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH4GridIntersector8HybridPluecker);
@ -241,30 +256,13 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4InstanceArrayMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridMBIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH4GridIntersector16HybridPluecker);
// ==============
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4IntersectorStreamPacketFallback);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4IntersectorStreamMoellerNoFilter);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4vIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Triangle4iIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamMoellerNoFilter);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4vIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN, BVH4Quad4iIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH4VirtualIntersectorStream);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH4InstanceIntersectorStream);
// SAH scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH4Curve4vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
@ -294,6 +292,9 @@ namespace embree
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH4GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH4GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
@ -312,5 +313,6 @@ namespace embree
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH4BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
};
}

View File

@ -21,6 +21,7 @@
#include "../geometry/subdivpatch1.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/accelinstance.h"
@ -66,6 +67,9 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
DECLARE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
@ -101,6 +105,9 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
@ -135,6 +142,9 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
@ -169,27 +179,12 @@ namespace embree
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
DECLARE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
DECLARE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
DECLARE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8OBBCurve8iMBBuilder_OBB,void* COMMA Scene* COMMA size_t);
@ -212,6 +207,9 @@ namespace embree
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4SceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Triangle4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
DECLARE_ISA_FUNCTION(Builder*,BVH8Quad4vSceneBuilderFastSpatialSAH,void* COMMA Scene* COMMA size_t);
@ -224,6 +222,7 @@ namespace embree
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DECLARE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
BVH8Factory::BVH8Factory(int bfeatures, int ifeatures)
{
@ -257,6 +256,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceSceneBuilderSAH));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceMBSceneBuilderSAH));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceArraySceneBuilderSAH));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX(features,BVH8InstanceArrayMBSceneBuilderSAH));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridSceneBuilderSAH));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX(features,BVH8GridMBSceneBuilderSAH));
@ -270,6 +272,7 @@ namespace embree
IF_ENABLED_QUADS (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelQuadMeshSAH));
IF_ENABLED_USER (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelVirtualSAH));
IF_ENABLED_INSTANCE (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelInstanceSAH));
IF_ENABLED_INSTANCE_ARRAY (SELECT_SYMBOL_INIT_AVX(features,BVH8BuilderTwoLevelInstanceArraySAH));
}
void BVH8Factory::selectIntersectors(int features)
@ -313,6 +316,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector1));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector1));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayIntersector1));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayMBIntersector1));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector1Moeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridMBIntersector1Moeller))
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector1Pluecker));
@ -351,6 +357,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector4Chunk));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector4Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayIntersector4Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayMBIntersector4Chunk));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector4HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector4HybridPluecker));
@ -386,6 +395,9 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersector8Chunk));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceMBIntersector8Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayIntersector8Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceArrayMBIntersector8Chunk));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector8HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8GridIntersector8HybridPluecker));
@ -421,29 +433,12 @@ namespace embree
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceIntersector16Chunk));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceMBIntersector16Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceArrayIntersector16Chunk));
IF_ENABLED_INSTANCE_ARRAY(SELECT_SYMBOL_INIT_AVX512(features,BVH8InstanceArrayMBIntersector16Chunk));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH8GridIntersector16HybridMoeller));
IF_ENABLED_GRIDS(SELECT_SYMBOL_INIT_AVX512(features,BVH8GridIntersector16HybridPluecker));
/* select stream intersectors */
SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8IntersectorStreamPacketFallback);
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4IntersectorStreamMoeller));
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4IntersectorStreamMoellerNoFilter));
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersectorStreamMoeller));
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4vIntersectorStreamPluecker));
IF_ENABLED_TRIS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Triangle4iIntersectorStreamPluecker));
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamMoeller));
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamMoellerNoFilter));
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersectorStreamMoeller));
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4vIntersectorStreamPluecker));
IF_ENABLED_QUADS(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8Quad4iIntersectorStreamPluecker));
IF_ENABLED_USER(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8VirtualIntersectorStream));
IF_ENABLED_INSTANCE(SELECT_SYMBOL_INIT_AVX_AVX2_AVX512(features,BVH8InstanceIntersectorStream));
#endif
}
@ -460,7 +455,6 @@ namespace embree
intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4Hybrid();
intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8Hybrid();
intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16Hybrid();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -474,7 +468,6 @@ namespace embree
intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4Hybrid();
intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8Hybrid();
intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16Hybrid();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -496,7 +489,6 @@ namespace embree
intersectors.intersector4 = BVH8OBBVirtualCurveIntersector4HybridMB();
intersectors.intersector8 = BVH8OBBVirtualCurveIntersector8HybridMB();
intersectors.intersector16 = BVH8OBBVirtualCurveIntersector16HybridMB();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -510,7 +502,6 @@ namespace embree
intersectors.intersector4 = BVH8OBBVirtualCurveIntersectorRobust4HybridMB();
intersectors.intersector8 = BVH8OBBVirtualCurveIntersectorRobust8HybridMB();
intersectors.intersector16 = BVH8OBBVirtualCurveIntersectorRobust16HybridMB();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -532,8 +523,6 @@ namespace embree
intersectors.intersector8_nofilter = BVH8Triangle4Intersector8HybridMoellerNoFilter();
intersectors.intersector16_filter = BVH8Triangle4Intersector16HybridMoeller();
intersectors.intersector16_nofilter = BVH8Triangle4Intersector16HybridMoellerNoFilter();
intersectors.intersectorN_filter = BVH8Triangle4IntersectorStreamMoeller();
intersectors.intersectorN_nofilter = BVH8Triangle4IntersectorStreamMoellerNoFilter();
#endif
return intersectors;
}
@ -554,7 +543,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4vIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Triangle4vIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Triangle4vIntersector16HybridPluecker();
intersectors.intersectorN = BVH8Triangle4vIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -571,7 +559,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4iIntersector4HybridMoeller();
intersectors.intersector8 = BVH8Triangle4iIntersector8HybridMoeller();
intersectors.intersector16 = BVH8Triangle4iIntersector16HybridMoeller();
intersectors.intersectorN = BVH8Triangle4iIntersectorStreamMoeller();
#endif
return intersectors;
}
@ -584,7 +571,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4iIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Triangle4iIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Triangle4iIntersector16HybridPluecker();
intersectors.intersectorN = BVH8Triangle4iIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -604,7 +590,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridMoeller();
intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -617,7 +602,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4vMBIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Triangle4vMBIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Triangle4vMBIntersector16HybridPluecker();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -637,7 +621,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridMoeller();
intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -650,7 +633,6 @@ namespace embree
intersectors.intersector4 = BVH8Triangle4iMBIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Triangle4iMBIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Triangle4iMBIntersector16HybridPluecker();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -673,8 +655,6 @@ namespace embree
intersectors.intersector8_nofilter = BVH8Quad4vIntersector8HybridMoellerNoFilter();
intersectors.intersector16_filter = BVH8Quad4vIntersector16HybridMoeller();
intersectors.intersector16_nofilter = BVH8Quad4vIntersector16HybridMoellerNoFilter();
intersectors.intersectorN_filter = BVH8Quad4vIntersectorStreamMoeller();
intersectors.intersectorN_nofilter = BVH8Quad4vIntersectorStreamMoellerNoFilter();
#endif
return intersectors;
}
@ -687,7 +667,6 @@ namespace embree
intersectors.intersector4 = BVH8Quad4vIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Quad4vIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Quad4vIntersector16HybridPluecker();
intersectors.intersectorN = BVH8Quad4vIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -707,7 +686,6 @@ namespace embree
intersectors.intersector4 = BVH8Quad4iIntersector4HybridMoeller();
intersectors.intersector8 = BVH8Quad4iIntersector8HybridMoeller();
intersectors.intersector16 = BVH8Quad4iIntersector16HybridMoeller();
intersectors.intersectorN = BVH8Quad4iIntersectorStreamMoeller();
#endif
return intersectors;
}
@ -720,7 +698,6 @@ namespace embree
intersectors.intersector4 = BVH8Quad4iIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Quad4iIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Quad4iIntersector16HybridPluecker();
intersectors.intersectorN = BVH8Quad4iIntersectorStreamPluecker();
#endif
return intersectors;
}
@ -740,7 +717,6 @@ namespace embree
intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridMoeller();
intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridMoeller();
intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridMoeller();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -753,7 +729,6 @@ namespace embree
intersectors.intersector4 = BVH8Quad4iMBIntersector4HybridPluecker();
intersectors.intersector8 = BVH8Quad4iMBIntersector8HybridPluecker();
intersectors.intersector16 = BVH8Quad4iMBIntersector16HybridPluecker();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -794,7 +769,6 @@ namespace embree
intersectors.intersector4 = BVH8VirtualIntersector4Chunk();
intersectors.intersector8 = BVH8VirtualIntersector8Chunk();
intersectors.intersector16 = BVH8VirtualIntersector16Chunk();
intersectors.intersectorN = BVH8VirtualIntersectorStream();
#endif
intersectors.collider = BVH8ColliderUserGeom();
return intersectors;
@ -809,7 +783,6 @@ namespace embree
intersectors.intersector4 = BVH8VirtualMBIntersector4Chunk();
intersectors.intersector8 = BVH8VirtualMBIntersector8Chunk();
intersectors.intersector16 = BVH8VirtualMBIntersector16Chunk();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
@ -823,7 +796,19 @@ namespace embree
intersectors.intersector4 = BVH8InstanceIntersector4Chunk();
intersectors.intersector8 = BVH8InstanceIntersector8Chunk();
intersectors.intersector16 = BVH8InstanceIntersector16Chunk();
intersectors.intersectorN = BVH8InstanceIntersectorStream();
#endif
return intersectors;
}
Accel::Intersectors BVH8Factory::BVH8InstanceArrayIntersectors(BVH8* bvh)
{
Accel::Intersectors intersectors;
intersectors.ptr = bvh;
intersectors.intersector1 = BVH8InstanceArrayIntersector1();
#if defined (EMBREE_RAY_PACKETS)
intersectors.intersector4 = BVH8InstanceArrayIntersector4Chunk();
intersectors.intersector8 = BVH8InstanceArrayIntersector8Chunk();
intersectors.intersector16 = BVH8InstanceArrayIntersector16Chunk();
#endif
return intersectors;
}
@ -837,7 +822,19 @@ namespace embree
intersectors.intersector4 = BVH8InstanceMBIntersector4Chunk();
intersectors.intersector8 = BVH8InstanceMBIntersector8Chunk();
intersectors.intersector16 = BVH8InstanceMBIntersector16Chunk();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
return intersectors;
}
Accel::Intersectors BVH8Factory::BVH8InstanceArrayMBIntersectors(BVH8* bvh)
{
Accel::Intersectors intersectors;
intersectors.ptr = bvh;
intersectors.intersector1 = BVH8InstanceArrayMBIntersector1();
#if defined (EMBREE_RAY_PACKETS)
intersectors.intersector4 = BVH8InstanceArrayMBIntersector4Chunk();
intersectors.intersector8 = BVH8InstanceArrayMBIntersector8Chunk();
intersectors.intersector16 = BVH8InstanceArrayMBIntersector16Chunk();
#endif
return intersectors;
}
@ -1086,6 +1083,28 @@ namespace embree
return new AccelInstance(accel,builder,intersectors);
}
Accel* BVH8Factory::BVH8InstanceArray(Scene* scene, BuildVariant bvariant)
{
BVH8* accel = new BVH8(InstanceArrayPrimitive::type,scene);
Accel::Intersectors intersectors = BVH8InstanceArrayIntersectors(accel);
auto gtype = Geometry::MTY_INSTANCE_ARRAY;
// Builder* builder = BVH8InstanceSceneBuilderSAH(accel,scene,gtype);
Builder* builder = nullptr;
if (scene->device->object_builder == "default") {
switch (bvariant) {
case BuildVariant::STATIC : builder = BVH8InstanceArraySceneBuilderSAH(accel,scene,gtype); break;
case BuildVariant::DYNAMIC : builder = BVH8BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false); break;
case BuildVariant::HIGH_QUALITY: assert(false); break;
}
}
else if (scene->device->object_builder == "sah") builder = BVH8InstanceArraySceneBuilderSAH(accel,scene,gtype);
else if (scene->device->object_builder == "dynamic") builder = BVH8BuilderTwoLevelInstanceArraySAH(accel,scene,gtype,false);
else throw_RTCError(RTC_ERROR_INVALID_ARGUMENT,"unknown builder "+scene->device->object_builder+" for BVH8<Object>");
return new AccelInstance(accel,builder,intersectors);
}
Accel* BVH8Factory::BVH8InstanceMB(Scene* scene, bool isExpensive)
{
BVH8* accel = new BVH8(InstancePrimitive::type,scene);
@ -1095,6 +1114,15 @@ namespace embree
return new AccelInstance(accel,builder,intersectors);
}
Accel* BVH8Factory::BVH8InstanceArrayMB(Scene* scene)
{
BVH8* accel = new BVH8(InstanceArrayPrimitive::type,scene);
Accel::Intersectors intersectors = BVH8InstanceArrayMBIntersectors(accel);
auto gtype = Geometry::MTY_INSTANCE_ARRAY;
Builder* builder = BVH8InstanceArrayMBSceneBuilderSAH(accel,scene,gtype);
return new AccelInstance(accel,builder,intersectors);
}
Accel::Intersectors BVH8Factory::BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant)
{
Accel::Intersectors intersectors;
@ -1106,7 +1134,6 @@ namespace embree
intersectors.intersector4 = BVH8GridIntersector4HybridMoeller();
intersectors.intersector8 = BVH8GridIntersector8HybridMoeller();
intersectors.intersector16 = BVH8GridIntersector16HybridMoeller();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
}
else /* if (ivariant == IntersectVariant::ROBUST) */
@ -1116,7 +1143,6 @@ namespace embree
intersectors.intersector4 = BVH8GridIntersector4HybridPluecker();
intersectors.intersector8 = BVH8GridIntersector8HybridPluecker();
intersectors.intersector16 = BVH8GridIntersector16HybridPluecker();
intersectors.intersectorN = BVH8IntersectorStreamPacketFallback();
#endif
}
return intersectors;
@ -1131,7 +1157,6 @@ namespace embree
intersectors.intersector4 = nullptr;
intersectors.intersector8 = nullptr;
intersectors.intersector16 = nullptr;
intersectors.intersectorN = nullptr;
#endif
return intersectors;
}

View File

@ -39,6 +39,9 @@ namespace embree
Accel* BVH8Instance(Scene* scene, bool isExpensive, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8InstanceMB(Scene* scene, bool isExpensive);
Accel* BVH8InstanceArray(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC);
Accel* BVH8InstanceArrayMB(Scene* scene);
Accel* BVH8Grid(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
Accel* BVH8GridMB(Scene* scene, BuildVariant bvariant = BuildVariant::STATIC, IntersectVariant ivariant = IntersectVariant::FAST);
@ -70,6 +73,9 @@ namespace embree
Accel::Intersectors BVH8InstanceIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceArrayIntersectors(BVH8* bvh);
Accel::Intersectors BVH8InstanceArrayMBIntersectors(BVH8* bvh);
Accel::Intersectors BVH8GridIntersectors(BVH8* bvh, IntersectVariant ivariant);
Accel::Intersectors BVH8GridMBIntersectors(BVH8* bvh, IntersectVariant ivariant);
@ -111,6 +117,9 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8InstanceArrayMBIntersector1);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridMBIntersector1Moeller);
DEFINE_SYMBOL2(Accel::Intersector1,BVH8GridIntersector1Pluecker);
@ -146,6 +155,9 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8InstanceArrayMBIntersector4Chunk);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector4,BVH8GridIntersector4HybridPluecker);
@ -180,6 +192,9 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8InstanceArrayMBIntersector8Chunk);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector8,BVH8GridIntersector8HybridPluecker);
@ -214,27 +229,12 @@ namespace embree
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8InstanceArrayMBIntersector16Chunk);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridMoeller);
DEFINE_SYMBOL2(Accel::Intersector16,BVH8GridIntersector16HybridPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8IntersectorStreamPacketFallback);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4IntersectorStreamMoellerNoFilter);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4vIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Triangle4iIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamMoellerNoFilter);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamMoeller);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4vIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8Quad4iIntersectorStreamPluecker);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8VirtualIntersectorStream);
DEFINE_SYMBOL2(Accel::IntersectorN,BVH8InstanceIntersectorStream);
// SAH scene builders
private:
DEFINE_ISA_FUNCTION(Builder*,BVH8Curve8vBuilder_OBB_New,void* COMMA Scene* COMMA size_t);
@ -259,6 +259,9 @@ namespace embree
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArraySceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8InstanceArrayMBSceneBuilderSAH,void* COMMA Scene* COMMA Geometry::GTypeMask);
DEFINE_ISA_FUNCTION(Builder*,BVH8GridSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
DEFINE_ISA_FUNCTION(Builder*,BVH8GridMBSceneBuilderSAH,void* COMMA Scene* COMMA size_t);
@ -276,5 +279,6 @@ namespace embree
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelQuadMeshSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelVirtualSAH,void* COMMA Scene* COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceSAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
DEFINE_ISA_FUNCTION(Builder*,BVH8BuilderTwoLevelInstanceArraySAH,void* COMMA Scene* COMMA Geometry::GTypeMask COMMA bool);
};
}

View File

@ -17,6 +17,7 @@
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#if defined(__64BIT__)
# define ROTATE_TREE 1 // specifies number of tree rotation rounds to perform
@ -399,6 +400,50 @@ namespace embree
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<int N>
struct CreateMortonLeaf<N,InstanceArrayPrimitive>
{
typedef BVHN<N> BVH;
typedef typename BVH::NodeRef NodeRef;
typedef typename BVH::NodeRecord NodeRecord;
__forceinline CreateMortonLeaf (InstanceArray* mesh, unsigned int geomID, BVHBuilderMorton::BuildPrim* morton)
: mesh(mesh), morton(morton), geomID_(geomID) {}
__noinline NodeRecord operator() (const range<unsigned>& current, const FastAllocator::CachedAllocator& alloc)
{
vfloat4 lower(pos_inf);
vfloat4 upper(neg_inf);
size_t items = current.size();
size_t start = current.begin();
assert(items <= 1);
/* allocate leaf node */
InstanceArrayPrimitive* accel = (InstanceArrayPrimitive*) alloc.malloc1(items*sizeof(InstanceArrayPrimitive),BVH::byteAlignment);
NodeRef ref = BVH::encodeLeaf((char*)accel,items);
const InstanceArray* instance = this->mesh;
BBox3fa bounds = empty;
for (size_t i=0; i<items; i++)
{
const unsigned int primID = morton[start+i].index;
bounds.extend(instance->bounds(primID));
new (&accel[i]) InstanceArrayPrimitive(geomID_, primID);
}
BBox3fx box_o = (BBox3fx&)bounds;
#if ROTATE_TREE
if (N == 4)
box_o.lower.a = current.size();
#endif
return NodeRecord(ref,box_o);
}
private:
InstanceArray* mesh;
BVHBuilderMorton::BuildPrim* morton;
unsigned int geomID_ = std::numeric_limits<unsigned int>::max();
};
template<typename Mesh>
struct CalculateMeshBounds
{
@ -525,6 +570,13 @@ namespace embree
#if defined(__AVX__)
Builder* BVH8InstanceMeshBuilderMortonGeneral (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,Instance,InstancePrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMeshBuilderMortonGeneral (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) { return new class BVHNMeshBuilderMorton<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,mesh,gtype,geomID,1,BVH4::maxLeafBlocks); }
#endif
#endif
}

View File

@ -15,6 +15,7 @@
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
@ -518,14 +519,35 @@ namespace embree
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE)
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
Builder* BVH4InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
}
Builder* BVH4InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,InstancePrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,inf,gtype);
}
#if defined(__AVX__)
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
Builder* BVH8InstanceSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
}
Builder* BVH8InstanceMeshBuilderSAH (void* bvh, Instance* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,inf,gtype);
return new BVHNBuilderSAH<8,InstancePrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
}
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype);
}
Builder* BVH4InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<4,InstanceArrayPrimitive>((BVH4*)bvh,mesh,geomID,4,1.0f,1,1,gtype);
}
#if defined(__AVX__)
Builder* BVH8InstanceArraySceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) {
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype);
}
Builder* BVH8InstanceArrayMeshBuilderSAH (void* bvh, InstanceArray* mesh, Geometry::GTypeMask gtype, unsigned int geomID, size_t mode) {
return new BVHNBuilderSAH<8,InstanceArrayPrimitive>((BVH8*)bvh,mesh,geomID,8,1.0f,1,1,gtype);
}
#endif
#endif

View File

@ -17,6 +17,7 @@
#include "../geometry/quadi.h"
#include "../geometry/object.h"
#include "../geometry/instance.h"
#include "../geometry/instance_array.h"
#include "../geometry/subgrid.h"
#include "../common/state.h"
@ -695,6 +696,13 @@ namespace embree
#endif
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,4,1.0f,1,1,gtype); }
#if defined(__AVX__)
Builder* BVH8InstanceArrayMBSceneBuilderSAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype) { return new BVHNBuilderMBlurSAH<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,8,1.0f,1,1,gtype); }
#endif
#endif
#if defined(EMBREE_GEOMETRY_GRID)
Builder* BVH4GridMBSceneBuilderSAH (void* bvh, Scene* scene, size_t mode) { return new BVHNBuilderMBlurSAHGrid<4>((BVH4*)bvh,scene,4,1.0f,4,4); }
#if defined(__AVX__)

View File

@ -1,6 +1,10 @@
// Copyright 2009-2021 Intel Corporation
// SPDX-License-Identifier: Apache-2.0
#if !defined(_CRT_SECURE_NO_WARNINGS)
#define _CRT_SECURE_NO_WARNINGS
#endif
#include "bvh_builder_twolevel.h"
#include "bvh_statistics.h"
#include "../builders/bvh_builder_sah.h"
@ -333,6 +337,12 @@ namespace embree
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH4BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<4,InstanceArray,InstanceArrayPrimitive>((BVH4*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#if defined(__AVX__)
#if defined(EMBREE_GEOMETRY_TRIANGLE)
Builder* BVH8BuilderTwoLevelTriangle4MeshSAH (void* bvh, Scene* scene, bool useMortonBuilder) {
@ -364,6 +374,12 @@ namespace embree
}
#endif
#if defined(EMBREE_GEOMETRY_INSTANCE_ARRAY)
Builder* BVH8BuilderTwoLevelInstanceArraySAH (void* bvh, Scene* scene, Geometry::GTypeMask gtype, bool useMortonBuilder) {
return new BVHNBuilderTwoLevel<8,InstanceArray,InstanceArrayPrimitive>((BVH8*)bvh,scene,gtype,useMortonBuilder);
}
#endif
#endif
}
}

View File

@ -7,7 +7,6 @@
#include "bvh_builder_twolevel_internal.h"
#include "bvh.h"
#include "../common/primref.h"
#include "../builders/priminfo.h"
#include "../builders/primrefgen.h"

Some files were not shown because too many files have changed in this diff Show More