diff --git a/.vs/simulator/v14/.suo b/.vs/simulator/v14/.suo
new file mode 100644
index 0000000..3ce124d
Binary files /dev/null and b/.vs/simulator/v14/.suo differ
diff --git a/Cloth.h b/Cloth.h
new file mode 100644
index 0000000..a4f60a1
--- /dev/null
+++ b/Cloth.h
@@ -0,0 +1,15 @@
+#pragma once
+
+#include "VAOMesh.h"
+
+enum cloth_type { SINGLE_LAYER_NOB, SINGLE_LAYER_BOUNDARY };
+class Cloth : public VAOMesh
+{
+public:
+	Cloth(cloth_type type = SINGLE_LAYER_BOUNDARY) : _type(type) { }
+	cloth_type get_obj_type() const { return _type;  }
+private:
+	cloth_type _type;
+};
+
+
diff --git a/Common.h b/Common.h
new file mode 100644
index 0000000..cb7c615
--- /dev/null
+++ b/Common.h
@@ -0,0 +1,20 @@
+#pragma once
+
+#include <vector>
+
+#include <glm\glm.hpp>
+
+typedef char sint8;
+typedef short sint16;
+typedef int sint32;
+typedef long long sint64;
+
+typedef sint8  uint8;
+typedef sint16 uint16;
+typedef sint32 uint32;
+typedef sint64 uint64;
+
+typedef std::vector<glm::vec4> Vec4s;
+typedef std::vector<glm::vec3> Vec3s;
+typedef std::vector<glm::vec2> Vec2s;
+
diff --git a/GLSLShader.cpp b/GLSLShader.cpp
new file mode 100644
index 0000000..208ed47
--- /dev/null
+++ b/GLSLShader.cpp
@@ -0,0 +1,119 @@
+//A simple class for handling GLSL shader compilation
+//Author: Movania Muhammad Mobeen
+//Last Modified: February 2, 2011
+#include "GLSLShader.h"
+#include <iostream>
+
+GLSLShader::GLSLShader(void)
+{
+	_totalShaders=0;
+	_shaders[VERTEX_SHADER]=0;
+	_shaders[FRAGMENT_SHADER]=0;
+	_shaders[GEOMETRY_SHADER]=0;
+	_attributeList.clear();
+	_uniformLocationList.clear();
+}
+
+GLSLShader::~GLSLShader(void)
+{
+	_attributeList.clear();	
+	_uniformLocationList.clear();	
+}
+
+void GLSLShader::LoadFromString(GLenum type, const string& source) {	
+	unsigned  int shader = glCreateShader (type);
+
+	const char * ptmp = source.c_str();
+	glShaderSource (shader, 1, &ptmp, NULL);
+	
+	//check whether the shader loads fine
+	GLint status;
+	glCompileShader (shader);
+	glGetShaderiv (shader, GL_COMPILE_STATUS, &status);
+	if (status == GL_FALSE) {
+		GLint infoLogLength;		
+		glGetShaderiv (shader, GL_INFO_LOG_LENGTH, &infoLogLength);
+		GLchar *infoLog= new GLchar[infoLogLength];
+		glGetShaderInfoLog (shader, infoLogLength, NULL, infoLog);
+		cerr<<"Compile log: "<<infoLog<<endl;
+		delete [] infoLog;
+	}
+	_shaders[_totalShaders++]=shader;
+}
+
+
+void GLSLShader::CreateAndLinkProgram() {
+	_program = glCreateProgram ();
+	if (_shaders[VERTEX_SHADER] != 0) {
+		glAttachShader (_program, _shaders[VERTEX_SHADER]);
+	}
+	if (_shaders[FRAGMENT_SHADER] != 0) {
+		glAttachShader (_program, _shaders[FRAGMENT_SHADER]);
+	}
+	if (_shaders[GEOMETRY_SHADER] != 0) {
+		glAttachShader (_program, _shaders[GEOMETRY_SHADER]);
+	}
+	
+	//link and check whether the program links fine
+	GLint status;
+	glLinkProgram (_program);
+	glGetProgramiv (_program, GL_LINK_STATUS, &status);
+	if (status == GL_FALSE) {
+		GLint infoLogLength;
+		
+		glGetProgramiv (_program, GL_INFO_LOG_LENGTH, &infoLogLength);
+		GLchar *infoLog= new GLchar[infoLogLength];
+		glGetProgramInfoLog (_program, infoLogLength, NULL, infoLog);
+		cerr<<"Link log: "<<infoLog<<endl;
+		delete [] infoLog;
+	}
+
+	glDeleteShader(_shaders[VERTEX_SHADER]);
+	glDeleteShader(_shaders[FRAGMENT_SHADER]);
+	glDeleteShader(_shaders[GEOMETRY_SHADER]);
+}
+
+void GLSLShader::Use() {
+	glUseProgram(_program);
+}
+
+void GLSLShader::UnUse() {
+	glUseProgram(0);
+}
+
+void GLSLShader::AddAttribute(const string& attribute) {
+	_attributeList[attribute]= glGetAttribLocation(_program, attribute.c_str());	
+}
+
+//An indexer that returns the location of the attribute
+unsigned  int GLSLShader::operator [](const string& attribute) {
+	return _attributeList[attribute];
+}
+
+void GLSLShader::AddUniform(const string& uniform) {
+	_uniformLocationList[uniform] = glGetUniformLocation(_program, uniform.c_str());
+}
+
+unsigned  int GLSLShader::operator()(const string& uniform){
+	return _uniformLocationList[uniform];
+}
+unsigned  int GLSLShader::GetProgram() const {
+	return _program;
+}
+#include <fstream>
+void GLSLShader::LoadFromFile(GLenum whichShader, const string& filename){
+	ifstream fp;
+	fp.open(filename.c_str(), ios_base::in);
+	if(fp) {		 
+		/*string line, buffer;
+		while(getline(fp, line)) {
+			buffer.append(line);
+			buffer.append("\r\n");
+		}		*/
+		string buffer(std::istreambuf_iterator<char>(fp), (std::istreambuf_iterator<char>()));
+		//copy to source
+		LoadFromString(whichShader, buffer);		
+	} else {
+		cerr<<"Error loading shader: "<<filename<<endl;
+	}
+}
diff --git a/GLSLShader.h b/GLSLShader.h
new file mode 100644
index 0000000..1d53156
--- /dev/null
+++ b/GLSLShader.h
@@ -0,0 +1,41 @@
+//A simple class for handling GLSL shader compilation
+//Auhtor: Movania Muhammad Mobeen
+#pragma once
+//#define GLEW_STATIC
+#include <map>
+#include <string>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <glm/glm.hpp>
+
+
+using namespace std;
+
+
+class GLSLShader
+{
+public:
+	GLSLShader(void);
+	~GLSLShader(void);
+	void LoadFromString(GLenum whichShader, const string& source);
+	void LoadFromFile(GLenum whichShader, const string& filename);
+	void CreateAndLinkProgram();
+	void Use();
+	void UnUse();
+	void AddAttribute(const string& attribute);
+	void AddUniform(const string& uniform);
+	unsigned int GetProgram() const;
+	//An indexer that returns the location of the attribute/uniform
+	unsigned  int operator[](const string& attribute);
+	unsigned  int operator()(const string& uniform);
+	//Program deletion
+	void DeleteProgram() { glDeleteProgram(_program); _program = -1; }
+private:
+	enum ShaderType { VERTEX_SHADER, FRAGMENT_SHADER, GEOMETRY_SHADER };
+	unsigned  int	_program;
+	int _totalShaders;
+	unsigned  int _shaders[3];//0->vertexshader, 1->fragmentshader, 2->geometryshader
+	map<string, unsigned  int> _attributeList;
+	map<string, unsigned  int> _uniformLocationList;
+};
diff --git a/KinectJointFilter.cpp b/KinectJointFilter.cpp
new file mode 100644
index 0000000..8b0f225
--- /dev/null
+++ b/KinectJointFilter.cpp
@@ -0,0 +1,189 @@
+//--------------------------------------------------------------------------------------
+// KinectJointFilter.cpp
+//
+// This file contains Holt Double Exponential Smoothing filter for filtering Joints
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//--------------------------------------------------------------------------------------
+
+//#include "stdafx.h"
+#include "KinectJointFilter.h"
+
+using namespace Sample;
+using namespace DirectX;
+
+//-------------------------------------------------------------------------------------
+// Name: Lerp()
+// Desc: Linear interpolation between two floats
+//-------------------------------------------------------------------------------------
+inline FLOAT Lerp( FLOAT f1, FLOAT f2, FLOAT fBlend )
+{
+	return f1 + ( f2 - f1 ) * fBlend;
+}
+
+//--------------------------------------------------------------------------------------
+// if joint is 0 it is not valid.
+//--------------------------------------------------------------------------------------
+inline BOOL JointPositionIsValid( XMVECTOR vJointPosition )
+{
+	return ( XMVectorGetX( vJointPosition ) != 0.0f ||
+		XMVectorGetY( vJointPosition ) != 0.0f ||
+		XMVectorGetZ( vJointPosition ) != 0.0f );
+}
+
+//--------------------------------------------------------------------------------------
+// Implementation of a Holt Double Exponential Smoothing filter. The double exponential
+// smooths the curve and predicts.  There is also noise jitter removal. And maximum
+// prediction bounds.  The paramaters are commented in the init function.
+//--------------------------------------------------------------------------------------
+void FilterDoubleExponential::Update( IBody *const pBody )
+{
+	assert( pBody );
+
+	// Check for divide by zero. Use an epsilon of a 10th of a millimeter
+	m_fJitterRadius = XMMax( 0.0001f, m_fJitterRadius );
+
+	TRANSFORM_SMOOTH_PARAMETERS SmoothingParams;
+
+	UINT jointCapacity = 0;
+	Joint joints[JointType_Count];
+
+	pBody->GetJoints( jointCapacity, joints );
+	for( INT i = 0; i < JointType_Count; i++ )
+	{
+		SmoothingParams.fSmoothing = m_fSmoothing;
+		SmoothingParams.fCorrection = m_fCorrection;
+		SmoothingParams.fPrediction = m_fPrediction;
+		SmoothingParams.fJitterRadius = m_fJitterRadius;
+		SmoothingParams.fMaxDeviationRadius = m_fMaxDeviationRadius;
+
+		// If inferred, we smooth a bit more by using a bigger jitter radius
+		Joint joint = joints[i];
+		if( joint.TrackingState == TrackingState::TrackingState_Inferred )
+		{
+			SmoothingParams.fJitterRadius *= 2.0f;
+			SmoothingParams.fMaxDeviationRadius *= 2.0f;
+		}
+
+		Update( joints, i, SmoothingParams );
+	}
+}
+
+void FilterDoubleExponential::Update( Joint joints[] )
+{
+	// Check for divide by zero. Use an epsilon of a 10th of a millimeter
+	m_fJitterRadius = XMMax( 0.0001f, m_fJitterRadius );
+
+	TRANSFORM_SMOOTH_PARAMETERS SmoothingParams;
+	for( INT i = 0; i < JointType_Count; i++ )
+	{
+		SmoothingParams.fSmoothing = m_fSmoothing;
+		SmoothingParams.fCorrection = m_fCorrection;
+		SmoothingParams.fPrediction = m_fPrediction;
+		SmoothingParams.fJitterRadius = m_fJitterRadius;
+		SmoothingParams.fMaxDeviationRadius = m_fMaxDeviationRadius;
+
+		// If inferred, we smooth a bit more by using a bigger jitter radius
+		Joint joint = joints[i];
+		if( joint.TrackingState == TrackingState::TrackingState_Inferred )
+		{
+			SmoothingParams.fJitterRadius *= 2.0f;
+			SmoothingParams.fMaxDeviationRadius *= 2.0f;
+		}
+
+		Update( joints, i, SmoothingParams );
+	}
+
+}
+
+void FilterDoubleExponential::Update( Joint joints[], UINT JointID, TRANSFORM_SMOOTH_PARAMETERS smoothingParams )
+{
+	XMVECTOR vPrevRawPosition;
+	XMVECTOR vPrevFilteredPosition;
+	XMVECTOR vPrevTrend;
+	XMVECTOR vRawPosition;
+	XMVECTOR vFilteredPosition;
+	XMVECTOR vPredictedPosition;
+	XMVECTOR vDiff;
+	XMVECTOR vTrend;
+	XMVECTOR vLength;
+	FLOAT fDiff;
+	BOOL bJointIsValid;
+
+	const Joint joint = joints[JointID];
+
+	vRawPosition = XMVectorSet( joint.Position.X, joint.Position.Y, joint.Position.Z, 0.0f );
+	vPrevFilteredPosition = m_pHistory[JointID].m_vFilteredPosition;
+	vPrevTrend = m_pHistory[JointID].m_vTrend;
+	vPrevRawPosition = m_pHistory[JointID].m_vRawPosition;
+	bJointIsValid = JointPositionIsValid( vRawPosition );
+
+	// If joint is invalid, reset the filter
+	if( !bJointIsValid )
+	{
+		m_pHistory[JointID].m_dwFrameCount = 0;
+	}
+
+	// Initial start values
+	if( m_pHistory[JointID].m_dwFrameCount == 0 )
+	{
+		vFilteredPosition = vRawPosition;
+		vTrend = XMVectorZero();
+		m_pHistory[JointID].m_dwFrameCount++;
+	}
+	else if( m_pHistory[JointID].m_dwFrameCount == 1 )
+	{
+		vFilteredPosition = XMVectorScale( XMVectorAdd( vRawPosition, vPrevRawPosition ), 0.5f );
+		vDiff = XMVectorSubtract( vFilteredPosition, vPrevFilteredPosition );
+		vTrend = XMVectorAdd( XMVectorScale( vDiff, smoothingParams.fCorrection ), XMVectorScale( vPrevTrend, 1.0f - smoothingParams.fCorrection ) );
+		m_pHistory[JointID].m_dwFrameCount++;
+	}
+	else
+	{
+		// First apply jitter filter
+		vDiff = XMVectorSubtract( vRawPosition, vPrevFilteredPosition );
+		vLength = XMVector3Length( vDiff );
+		fDiff = fabs( XMVectorGetX( vLength ) );
+
+		if( fDiff <= smoothingParams.fJitterRadius )
+		{
+			vFilteredPosition = XMVectorAdd( XMVectorScale( vRawPosition, fDiff / smoothingParams.fJitterRadius ),
+				XMVectorScale( vPrevFilteredPosition, 1.0f - fDiff / smoothingParams.fJitterRadius ) );
+		}
+		else
+		{
+			vFilteredPosition = vRawPosition;
+		}
+
+		// Now the double exponential smoothing filter
+		vFilteredPosition = XMVectorAdd( XMVectorScale( vFilteredPosition, 1.0f - smoothingParams.fSmoothing ),
+			XMVectorScale( XMVectorAdd( vPrevFilteredPosition, vPrevTrend ), smoothingParams.fSmoothing ) );
+
+
+		vDiff = XMVectorSubtract( vFilteredPosition, vPrevFilteredPosition );
+		vTrend = XMVectorAdd( XMVectorScale( vDiff, smoothingParams.fCorrection ), XMVectorScale( vPrevTrend, 1.0f - smoothingParams.fCorrection ) );
+	}
+
+	// Predict into the future to reduce latency
+	vPredictedPosition = XMVectorAdd( vFilteredPosition, XMVectorScale( vTrend, smoothingParams.fPrediction ) );
+
+	// Check that we are not too far away from raw data
+	vDiff = XMVectorSubtract( vPredictedPosition, vRawPosition );
+	vLength = XMVector3Length( vDiff );
+	fDiff = fabs( XMVectorGetX( vLength ) );
+
+	if( fDiff > smoothingParams.fMaxDeviationRadius )
+	{
+		vPredictedPosition = XMVectorAdd( XMVectorScale( vPredictedPosition, smoothingParams.fMaxDeviationRadius / fDiff ),
+			XMVectorScale( vRawPosition, 1.0f - smoothingParams.fMaxDeviationRadius / fDiff ) );
+	}
+
+	// Save the data from this frame
+	m_pHistory[JointID].m_vRawPosition = vRawPosition;
+	m_pHistory[JointID].m_vFilteredPosition = vFilteredPosition;
+	m_pHistory[JointID].m_vTrend = vTrend;
+
+	// Output the data
+	m_pFilteredJoints[JointID] = vPredictedPosition;
+	m_pFilteredJoints[JointID] = XMVectorSetW( m_pFilteredJoints[JointID], 1.0f );
+}
\ No newline at end of file
diff --git a/KinectJointFilter.h b/KinectJointFilter.h
new file mode 100644
index 0000000..03e0752
--- /dev/null
+++ b/KinectJointFilter.h
@@ -0,0 +1,83 @@
+//--------------------------------------------------------------------------------------
+// KinectJointFilter.h
+//
+// This file contains Holt Double Exponential Smoothing filter for filtering Joints
+//
+// Copyright (C) Microsoft Corporation. All rights reserved.
+//--------------------------------------------------------------------------------------
+
+#pragma once
+
+#include <Windows.h>
+#include <Kinect.h>
+#include <DirectXMath.h>
+#include <queue>
+
+namespace Sample
+{
+	typedef struct _TRANSFORM_SMOOTH_PARAMETERS
+	{
+		FLOAT   fSmoothing;             // [0..1], lower values closer to raw data
+		FLOAT   fCorrection;            // [0..1], lower values slower to correct towards the raw data
+		FLOAT   fPrediction;            // [0..n], the number of frames to predict into the future
+		FLOAT   fJitterRadius;          // The radius in meters for jitter reduction
+		FLOAT   fMaxDeviationRadius;    // The maximum radius in meters that filtered positions are allowed to deviate from raw data
+	} TRANSFORM_SMOOTH_PARAMETERS;
+
+	// Holt Double Exponential Smoothing filter
+	class FilterDoubleExponentialData
+	{
+		public:
+		DirectX::XMVECTOR m_vRawPosition;
+		DirectX::XMVECTOR m_vFilteredPosition;
+		DirectX::XMVECTOR m_vTrend;
+		DWORD    m_dwFrameCount;
+	};
+
+	class FilterDoubleExponential
+	{
+		public:
+		FilterDoubleExponential() { Init(); }
+		~FilterDoubleExponential() { Shutdown(); }
+
+		void Init( FLOAT fSmoothing = 0.25f, FLOAT fCorrection = 0.25f, FLOAT fPrediction = 0.25f, FLOAT fJitterRadius = 0.03f, FLOAT fMaxDeviationRadius = 0.05f )
+		{
+			Reset( fSmoothing, fCorrection, fPrediction, fJitterRadius, fMaxDeviationRadius );
+		}
+
+		void Shutdown()
+		{
+		}
+
+		void Reset( FLOAT fSmoothing = 0.25f, FLOAT fCorrection = 0.25f, FLOAT fPrediction = 0.25f, FLOAT fJitterRadius = 0.03f, FLOAT fMaxDeviationRadius = 0.05f )
+		{
+			assert( m_pFilteredJoints );
+			assert( m_pHistory );
+
+			m_fMaxDeviationRadius = fMaxDeviationRadius; // Size of the max prediction radius Can snap back to noisy data when too high
+			m_fSmoothing = fSmoothing;                   // How much smothing will occur.  Will lag when too high
+			m_fCorrection = fCorrection;                 // How much to correct back from prediction.  Can make things springy
+			m_fPrediction = fPrediction;                 // Amount of prediction into the future to use. Can over shoot when too high
+			m_fJitterRadius = fJitterRadius;             // Size of the radius where jitter is removed. Can do too much smoothing when too high
+
+			memset( m_pFilteredJoints, 0, sizeof( DirectX::XMVECTOR ) * JointType_Count );
+			memset( m_pHistory, 0, sizeof( FilterDoubleExponentialData ) * JointType_Count );
+		}
+
+		void Update( IBody *const pBody );
+		void Update( Joint joints[] );
+
+		inline const DirectX::XMVECTOR *GetFilteredJoints() const { return &m_pFilteredJoints[0]; }
+
+		private:
+		DirectX::XMVECTOR m_pFilteredJoints[JointType_Count];
+		FilterDoubleExponentialData m_pHistory[JointType_Count];
+		FLOAT m_fSmoothing;
+		FLOAT m_fCorrection;
+		FLOAT m_fPrediction;
+		FLOAT m_fJitterRadius;
+		FLOAT m_fMaxDeviationRadius;
+
+		void Update( Joint joints[], UINT JointID, TRANSFORM_SMOOTH_PARAMETERS smoothingParams );
+	};
+}
\ No newline at end of file
diff --git a/Mesh.cpp b/Mesh.cpp
new file mode 100644
index 0000000..c07cb13
--- /dev/null
+++ b/Mesh.cpp
@@ -0,0 +1,150 @@
+#include "Mesh.h"
+
+void Mesh::clear()
+{
+	vertices.clear();
+	normals.clear();
+	texures.clear();
+	faces.clear();
+
+	objects.clear();
+	fgroups.clear();
+}
+
+// ����ѧ�����ϵ�scale��ô�Ƿ��ģ�
+// ���center��Ϊ0�����˲��ӻ�����
+void Mesh::scale(float s)
+{
+	//��ȡģ����������
+	glm::vec3 center = glm::vec3(0.0f, 0.0f, 0.0f);//get_center();
+												//const float up = 1.2;
+	for (int i = 0; i < vertices.size(); ++i)
+	{
+		vertices[i] -= glm::vec4(center, 0); 
+		vertices[i].x /= s;
+		vertices[i].y /= s;
+		vertices[i].z /= s;
+	}
+}
+
+void Mesh::translate(float x, float y, float z)
+{
+	//��ȡģ����������
+	for (int i = 0; i < vertices.size(); ++i)
+	{
+		vertices[i] += glm::vec4(x, y, z, 0); 
+	}
+}
+
+void Mesh::rotation(float x, float y, float z)
+{
+	x = x / 180.0f * 3.1415f;
+	y = y / 180.0f * 3.1415f;
+	z = z / 180.0f * 3.1415f;
+
+	glm::vec3 center = get_center();
+
+	glm::mat4x4 Rx = {
+		{ 1.0, 0.0, 0.0, 0.0 },
+		{ 0.0, cos(x), -sin(x), 0.0 },
+		{ 0.0, sin(x), cos(x), 0.0 },
+		{ 0.0, 0.0, 0.0, 1.0 }
+	};
+	glm::mat4x4 Ry = {
+		{ cos(y), 0, -sin(y), 0 },
+		{ 0, 1.0, 0, 0 },
+		{ sin(y), 0, cos(y), 0 },
+		{ 0, 0, 0, 1.0 }
+	};
+	glm::mat4x4 Rz = {
+		{ cos(z), -sin(z), 0, 0 },
+		{ sin(z), cos(z), 0, 0 },
+		{ 0, 0, 1, 0 },
+		{ 0, 0, 0, 1 }
+	};
+
+	glm::mat4x4 R = Rx * Ry * Rz;
+
+	for (auto &vertex : vertices)
+	{
+		vertex -= glm::vec4(center, 0.0);
+		vertex = R * vertex;
+		vertex += glm::vec4(center, 0.0);
+	}
+}
+
+void Mesh::extend(float dist)
+{
+	for (int i = 0; i < vertices.size(); ++i)
+	{
+		glm::vec3 n = normals[i];
+		vertices[i] += dist * glm::vec4(n, 0);
+	}
+}
+
+glm::vec3 Mesh::get_center()
+{
+	glm::vec3 center;
+	for (int i = 0; i < vertices.size(); ++i)
+	{
+		glm::vec4 v = vertices[i]; 
+		center += glm::vec3(v.x, v.y, v.z);
+	}
+	center /= vertices.size(); 
+	return center;
+}
+
+void Mesh::get_euler_coordinates(Vec3s &result) const
+{
+	result.resize(vertices.size());
+	for (int i = 0; i < vertices.size(); i++)
+	{
+		const glm::vec4 &vertex = vertices[i]; 
+
+		float w = vertex.w;
+		result[i] = glm::vec3(vertex.x / w, vertex.y / w, vertex.z / w);
+	}
+}
+
+void Mesh::get_vertex_adjface_matrix(
+	std::vector<unsigned int> &adjface, 
+	unsigned int maxneighbor, 
+	unsigned int pad) const
+{
+	unsigned int v; 
+	std::vector<size_t> indices(vertices.size(), 0);
+
+	adjface.resize(vertices.size() * maxneighbor);
+
+	for (unsigned int i = 0; i < faces.size(); ++i)
+	{
+		v = faces[i].v0;
+		if (indices[v] < maxneighbor)
+		{
+			adjface[v * maxneighbor + indices[v]] = i;
+			++indices[v];
+		}
+		
+		v = faces[i].v1;
+		if (indices[v] < maxneighbor)
+		{
+			adjface[v * maxneighbor + indices[v]] = i;
+			++indices[v];
+		}
+
+		v = faces[i].v2;
+		if (indices[v] < maxneighbor)
+		{
+			adjface[v * maxneighbor + indices[v]] = i;
+			++indices[v];
+		}
+	}
+
+	for (unsigned int i = 0; i < vertices.size(); ++i)
+	{
+		if (indices[i] < maxneighbor)
+		{
+			adjface[i * maxneighbor + indices[i]] = pad;
+		}
+	}
+}
\ No newline at end of file
diff --git a/Mesh.h b/Mesh.h
new file mode 100644
index 0000000..72c148a
--- /dev/null
+++ b/Mesh.h
@@ -0,0 +1,74 @@
+#pragma once
+
+#pragma once
+#include <vector>
+#include <string>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <glm/glm.hpp>
+
+#include "Common.h"
+
+class Mesh
+{
+public:
+#if 0
+	typedef std::vector<std::vector<unsigned int>> V2FIndices;
+	typedef std::vector<std::vector<unsigned int>> V2VIndices;
+
+	struct Indices
+	{
+		F2VIndices f2v;
+		F2TIndices f2t;
+		F2NIndices f2n;
+	};
+#endif
+
+	struct Face
+	{
+		unsigned int v0;
+		unsigned int v1;
+		unsigned int v2;
+	};
+	typedef std::vector<Face> Faces;
+
+	typedef std::vector<std::pair<std::string, unsigned int>> MeshObjects;
+	typedef std::vector<std::pair<std::string, unsigned int>> FaceGroups;
+
+public:
+	void clear(); 
+	void scale(float s);
+	void translate(float x, float y, float z);
+	void rotation(float x, float y, float z);
+
+	//����NORMAL������չ�㣬��ͬ��SCALE
+	void extend(float dist);
+	glm::vec3 get_center();
+
+	void get_euler_coordinates(Vec3s &result) const;
+	
+	// ��������-�������ڽӾ���
+	void get_vertex_adjface_matrix(
+		std::vector<unsigned int> &vertex_adjface, 
+		unsigned int maxneighbor, 
+		unsigned int pad
+	) const;
+
+public:
+	Vec4s vertices;
+	Vec3s normals;
+	Vec2s texures;
+
+	Faces faces; 
+
+	// for vertices region division 
+	MeshObjects objects;  
+	FaceGroups  fgroups;
+
+	GLuint gl_texture;
+
+	friend class ObjLoader;
+};
+
+
diff --git a/ObjLoader.cpp b/ObjLoader.cpp
new file mode 100644
index 0000000..1c9e237
--- /dev/null
+++ b/ObjLoader.cpp
@@ -0,0 +1,275 @@
+#include <iostream>
+#include <fstream>
+#include <sstream>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+
+#include <FreeImage.h>
+
+#include "ObjLoader.h"
+
+using namespace std;
+
+ObjLoader::ObjLoader()
+{
+	install_parser(0, "mtllib", &ObjLoader::parse_mt);
+	install_parser(1, "object", &ObjLoader::parse_obj_name);
+	install_parser(2, "mesh.vertices", &ObjLoader::parse_obj_mesh);
+	install_parser(0, "v", &ObjLoader::parse_v);
+	install_parser(0, "vn", &ObjLoader::parse_vn);
+	install_parser(0, "vt", &ObjLoader::parse_vt);
+	install_parser(0, "g", &ObjLoader::parse_fgroup_name);
+	install_parser(2, "polygons", &ObjLoader::parse_fgroup_mesh);
+	install_parser(0, "f", &ObjLoader::parse_f);
+}
+
+void ObjLoader::parse(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	for (auto &item : _parsers)
+	{
+		if (token[item.id] == item.cond)
+		{
+			(this->*item.parser)(mesh, token, buffer);
+			return; 
+		}
+	}
+}
+
+void ObjLoader::install_parser(int id, const std::string &cond, Parser parser)
+{
+	PaserItem item;
+	item.id = id;
+	item.cond = cond;
+	item.parser = parser;
+
+	_parsers.push_back(item);
+}
+
+void ObjLoader::parse_mt(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	_mtlfile = token[1];
+}
+
+void ObjLoader::parse_obj_name(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	mesh.objects.push_back(make_pair(token[2], 0));
+}
+
+void ObjLoader::parse_obj_mesh(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	mesh.objects.back().second = atoi(token[1].c_str());
+}
+
+void ObjLoader::parse_fgroup_name(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	mesh.fgroups.push_back(make_pair(token[1], 0));
+}
+
+void ObjLoader::parse_fgroup_mesh(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	stringstream tem_line(buffer);
+	string	f00, f11, f22, f33, f44;
+	tem_line >> f00 >> f11 >> f22 >> f33 >> f44;
+	mesh.fgroups.back().second = atoi(f44.c_str());
+}
+
+void ObjLoader::parse_v(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	glm::vec4 ver(atof(token[1].c_str()), atof(token[2].c_str()), atof(token[3].c_str()), 1.0);
+	mesh.vertices.push_back(ver);
+}
+void ObjLoader::parse_vn(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	glm::vec3 nor(atof(token[1].c_str()), atof(token[2].c_str()), atof(token[3].c_str()));
+	mesh.normals.push_back(nor);
+}
+void ObjLoader::parse_vt(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	glm::vec2 tex_coords(atof(token[1].c_str()), atof(token[2].c_str()));
+	mesh.texures.push_back(tex_coords);
+}
+void ObjLoader::parse_f(Mesh &mesh, std::string token[4], std::string &buffer)
+{
+	Mesh::Face face;
+	unsigned int fi[3];
+	unsigned int ti[3];
+	unsigned int ni[3];
+
+	size_t sPos = 0;
+	size_t ePos = sPos;
+	string temp;
+
+	ePos = token[1].find_first_of("/");
+	if (ePos == string::npos)  //������ͬ��ʽ��face, f  1 2 3
+	{
+		fi[0] = atoi(token[1].c_str()) - 1;
+		fi[1] = atoi(token[2].c_str()) - 1;
+		fi[2] = atoi(token[3].c_str()) - 1;
+
+		ti[0] = 0;     //add default mesh.texures
+		ti[1] = 1;
+		ti[2] = 2;
+
+		ni[0] = atoi(token[1].c_str()) - 1;
+		ni[1] = atoi(token[2].c_str()) - 1;
+		ni[2] = atoi(token[3].c_str()) - 1;
+
+	}
+	else     //������ͬ��ʽ��face, f  1/1/1 2/2/2 3/3/3
+	{
+		for (int i = 0; i < 3; ++i)
+		{
+			sPos = 0;
+			ePos = token[i + 1].find_first_of("/");
+			assert(ePos != string::npos);
+			temp = token[i + 1].substr(sPos, ePos - sPos);
+			fi[i] = atoi(temp.c_str()) - 1;
+
+			sPos = ePos + 1;
+			ePos = token[i + 1].find("/", sPos);
+			assert(ePos != string::npos);
+			temp = token[i + 1].substr(sPos, ePos - sPos);
+			ti[i] = atoi(temp.c_str()) - 1;
+
+			sPos = ePos + 1;
+			ePos = token[i + 1].length();
+			temp = token[i + 1].substr(sPos, ePos - sPos);
+			ni[i] = atoi(temp.c_str()) - 1;
+		}
+	}
+
+	face.v0 = fi[0];
+	face.v1 = fi[1];
+	face.v2 = fi[2];
+	mesh.faces.push_back(face);
+	
+	for (int i = 0; i < 3; ++i)
+	{
+		_v_indices.push_back(fi[i]);
+		_t_indices.push_back(ti[i]);
+		_n_indices.push_back(ni[i]);
+	}
+}
+
+void ObjLoader::parse_mt_file(Mesh &mesh)
+{
+	string f0, f1;
+
+	if (_mtlfile.empty())
+	{
+		return;
+	}
+	// read material, here we just read the texture and only 1 photo
+	_mtlfile = _path + _mtlfile;
+	ifstream input(_mtlfile);
+	if (!input)
+	{
+		cerr << "error: unable to open input file: " << endl;
+		exit(-1);
+	}
+
+	while (!input.eof())
+	{
+		string buffer;
+		getline(input, buffer);
+		stringstream line(buffer);
+		line >> f0 >> f1;
+		if (f0 == "map_Ka")
+		{
+			_texfile = f1;
+			break;
+		}
+	}
+}
+
+void ObjLoader::parse_texure_file(Mesh &mesh)
+{
+	if (_texfile.empty()) // load .png and you need to initilize opengl before load
+	{
+		return; 
+	}
+
+	_texfile = _path + _texfile;
+	FIBITMAP *dib = FreeImage_Load(FIF_PNG, _texfile.c_str(), PNG_DEFAULT);
+
+	if (FreeImage_GetBPP(dib) != 32)
+	{
+		FIBITMAP *tempImage = dib;
+		dib = FreeImage_ConvertTo32Bits(tempImage);
+	}
+
+	if (dib == NULL)
+	{
+		return; 
+	}
+
+	glGenTextures(1, &mesh.gl_texture);
+	glBindTexture(GL_TEXTURE_2D, mesh.gl_texture);
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MIN_FILTER, GL_LINEAR);
+	glTexParameteri(GL_TEXTURE_2D, GL_TEXTURE_MAG_FILTER, GL_LINEAR);
+
+	BYTE *bits = new BYTE[FreeImage_GetWidth(dib) * FreeImage_GetHeight(dib) * 4];
+	BYTE *pixels = (BYTE*)FreeImage_GetBits(dib);
+
+	for (unsigned int pix = 0; pix < FreeImage_GetWidth(dib) * FreeImage_GetHeight(dib); pix++)
+	{
+		bits[pix * 4 + 0] = pixels[pix * 4 + 2];
+		bits[pix * 4 + 1] = pixels[pix * 4 + 1];
+		bits[pix * 4 + 2] = pixels[pix * 4 + 0];
+		bits[pix * 4 + 3] = pixels[pix * 4 + 3];
+	}
+
+	glTexImage2D(GL_TEXTURE_2D, 0, GL_RGBA, FreeImage_GetWidth(dib), FreeImage_GetHeight(dib), 0, GL_RGBA, GL_UNSIGNED_BYTE, bits);
+
+	FreeImage_Unload(dib);
+	delete bits;
+}
+
+void ObjLoader::unified(Mesh &mesh)
+{
+	Vec2s uni_tex(mesh.vertices.size());
+	Vec3s uni_normals(mesh.vertices.size());
+
+	for (int i = 0; i < _v_indices.size(); ++i)
+	{
+		uni_tex[_v_indices[i]] = mesh.texures[_t_indices[i]];
+		uni_normals[_v_indices[i]] = mesh.normals[_n_indices[i]];
+	}
+
+	mesh.texures.swap(uni_tex);
+	mesh.normals.swap(uni_normals);
+}
+
+void ObjLoader::load(Mesh &mesh, const std::string &file)
+{
+	mesh.clear();
+	_v_indices.clear();
+	_t_indices.clear();
+	_n_indices.clear();
+
+	_path = file.substr(0, file.find_last_of("/") + 1);
+	string f[4];
+
+	ifstream input(file);
+	if (!input)
+	{
+		cout << "error: unable to open input file: " << endl;
+		exit(-1);
+	}
+
+	while (!input.eof())
+	{
+		string buffer;
+		getline(input, buffer);
+		stringstream line(buffer);
+		line >> f[0] >> f[1] >> f[2] >> f[3];
+
+		parse(mesh, f, buffer); 
+	}
+
+	parse_mt_file(mesh); 
+	parse_texure_file(mesh); 
+
+	unified(mesh); 
+}
diff --git a/ObjLoader.h b/ObjLoader.h
new file mode 100644
index 0000000..552b3b2
--- /dev/null
+++ b/ObjLoader.h
@@ -0,0 +1,67 @@
+#pragma once
+#include <list>
+#include <vector>
+#include <string>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <glm/glm.hpp>
+
+#include "Common.h"
+#include "Mesh.h"
+
+class ObjLoader
+{
+private:
+	typedef std::vector<unsigned int> F2VIndices;
+	typedef std::vector<unsigned int> F2TIndices;
+	typedef std::vector<unsigned int> F2NIndices;
+
+	typedef void (ObjLoader::*Parser)(Mesh &mesh, std::string token[4], std::string &buffer);
+	struct PaserItem
+	{
+		int id;
+		std::string cond;
+		Parser parser;
+	};
+
+	typedef std::list<PaserItem> Parsers;
+	
+public:
+	ObjLoader(); 
+	void load(Mesh &mesh, const std::string &file);
+
+private:
+	void install_parser(int id, const std::string &cond, Parser parser);
+	void parse(Mesh &mesh, std::string token[4], std::string &buffer);
+	
+	// unify the data, so that one vertex -> one normal -> one texture, 
+	// or error acurred while rendering
+	void unified(Mesh &mesh);
+
+	void parse_mt(Mesh &mesh, std::string token[4], std::string &buffer); 
+	void parse_obj_name(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_obj_mesh(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_fgroup_name(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_fgroup_mesh(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_v(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_vn(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_vt(Mesh &mesh, std::string token[4], std::string &buffer);
+	void parse_f(Mesh &mesh, std::string token[4], std::string &buffer); 
+
+	void parse_mt_file(Mesh &mesh);
+	void parse_texure_file(Mesh &mesh);
+
+private:
+	F2VIndices _v_indices;
+	F2TIndices _t_indices;
+	F2NIndices _n_indices;
+
+	std::string _path; 
+	std::string _texfile;
+	std::string _mtlfile;
+
+	Parsers _parsers;
+};
+
+
diff --git a/Simulator.cu b/Simulator.cu
new file mode 100644
index 0000000..fa58d90
--- /dev/null
+++ b/Simulator.cu
@@ -0,0 +1,322 @@
+#include <iostream>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <cuda_runtime.h>
+#include <cuda_gl_interop.h>
+
+#include "Utilities.h"
+#include "SpringsBuilder.h"
+#include "bvh\BRTreeNode.h"
+#include "bvh\BVHAccel.h"
+#include "Cloth.h"
+#include "ObjLoader.h"
+#include "Verlet.h"
+#include "Simulator.h"
+
+using namespace std;
+
+extern GLenum GL_MODE;
+
+Simulator::Simulator() :
+	_size_faces(0), _size_vertices(0),
+	_num_faces(0), _num_vertices(0),
+	_id_in(0), _id_out(1),
+	_d_x_orignal(NULL),
+	_d_dir_collision_force(NULL),
+	_d_dir_face_normals(NULL),
+	_d_adj_spring_st(NULL),
+	_d_adj_spring_bd(NULL),
+	_d_adj_face_to_vertices(NULL),
+	_d_adj_vertex_to_faces(NULL),
+#ifdef DEBUG_COLLISION
+	collided_vertex(NULL),
+#endif
+	_d_vbo_resource(NULL),
+	_d_vbo_vertices(NULL),
+	_d_vbo_normals(NULL)
+{
+	_d_x_cur[0] = NULL;
+	_d_x_cur[1] = NULL;
+	_d_x_lst[0] = NULL;
+	_d_x_lst[1] = NULL;
+
+}
+
+Simulator::~Simulator()
+{
+	ref_release();
+}
+
+void Simulator::update_body(const Mesh &body)
+{
+	if (body.vertices.empty()) {
+		cout << "return" << endl;
+		return;
+	}
+
+	Mesh ex_body = body;
+	ex_body.extend(0.01f);
+
+	_bvh_builder.build_bvh(_bvh_tree, ex_body);
+	_ccd_builder.build_ccd(_ccd_tree, body);
+}
+
+// �����ռ�
+void Simulator::create_buffer()
+{
+	size_t heap_size = 256 * 1024 * 1024;  //set heap size, the default is 8M
+	cudaDeviceSetLimit(cudaLimitMallocHeapSize, heap_size);
+
+	//��sim_cloth�ĵ�����귢�͵�GPU
+	const size_t vertices_bytes = sizeof(glm::vec3) * _size_vertices;
+	safe_cuda(cudaMalloc((void**)&_d_x_orignal, vertices_bytes));
+	safe_cuda(cudaMalloc((void**)&_d_x_cur[0], vertices_bytes));
+	safe_cuda(cudaMalloc((void**)&_d_x_cur[1], vertices_bytes));
+	safe_cuda(cudaMalloc((void**)&_d_x_lst[0], vertices_bytes));
+	safe_cuda(cudaMalloc((void**)&_d_x_lst[1], vertices_bytes));
+
+	_d_x_cur_in = _d_x_cur[_id_in];
+	_d_x_lst_in = _d_x_lst[_id_in];
+
+	_d_x_cur_out = _d_x_cur[_id_out];
+	_d_x_lst_out = _d_x_lst[_id_out];
+
+	safe_cuda(cudaMalloc((void**)&_d_dir_collision_force, sizeof(glm::vec3) * _size_vertices));
+	// ��ķ�����
+	safe_cuda(cudaMalloc((void**)&_d_dir_face_normals, sizeof(glm::vec3) * _size_faces));
+
+	// ÿ�����ڽӵ��������
+	safe_cuda(cudaMalloc((void**)&_d_adj_vertex_to_faces, sizeof(unsigned int) * _size_vertices * sim_parameter.NUM_PER_VERTEX_ADJ_FACES));
+	// �������
+	safe_cuda(cudaMalloc((void**)&_d_adj_face_to_vertices, sizeof(unsigned int) * _size_faces * 3));
+
+	safe_cuda(cudaMalloc((void**)&_d_adj_spring_st, sizeof(unsigned int) * _size_vertices * sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT));
+	safe_cuda(cudaMalloc((void**)&_d_adj_spring_bd, sizeof(unsigned int) * _size_vertices * sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT));
+
+#ifdef DEBUG_COLLISION
+	safe_cuda(cudaMalloc((void**)&collided_vertex, sizeof(unsigned int) * _size_vertices));
+#endif
+}
+
+
+void Simulator::ref_auto_clean()
+{
+	cudaFree(_d_x_orignal);
+	cudaFree(_d_x_cur[0]);
+	cudaFree(_d_x_cur[1]);
+	cudaFree(_d_x_lst[0]);
+	cudaFree(_d_x_lst[1]);
+	cudaFree(_d_dir_collision_force);
+	cudaFree(_d_dir_face_normals);
+
+	cudaFree(_d_adj_face_to_vertices);
+	cudaFree(_d_adj_vertex_to_faces);
+	cudaFree(_d_adj_spring_st);
+	cudaFree(_d_adj_spring_bd);
+
+
+#ifdef DEBUG_COLLISION
+	cudaFree(collided_vertex);
+#endif
+}
+
+void Simulator::update_cloth(const Cloth &cloth)
+{
+	//register vbo
+	safe_cuda(cudaGraphicsGLRegisterBuffer(
+		&_d_vbo_resource, cloth.vbo.array_buffer, cudaGraphicsMapFlagsWriteDiscard));
+
+	_num_textures = cloth.texures.size();
+	_num_vertices = cloth.vertices.size();
+	_num_faces = cloth.faces.size();
+
+	if (_size_vertices < _num_vertices || _size_faces < _num_faces)
+	{
+		_size_vertices = _num_textures;
+		_size_faces = _num_faces;
+
+		ref_renew();
+		create_buffer();
+	}
+
+	// ÿ����������NUM_PER_VERTEX_ADJ_FACES���ڽ��棬��������UINT_MAX��Ϊ������־
+	std::vector<unsigned int> vertex_adjfaces(_num_vertices * sim_parameter.NUM_PER_VERTEX_ADJ_FACES);
+	cloth.get_vertex_adjface_matrix(vertex_adjfaces, sim_parameter.NUM_PER_VERTEX_ADJ_FACES, UINT_MAX);
+
+	// ��������ݴ���GPU
+	const size_t vertices_bytes = sizeof(glm::vec3) * _num_vertices;
+
+	Vec3s cloth_v3(_num_vertices);
+	for (size_t idx = 0; idx < _num_vertices; ++idx)
+	{
+		glm::vec4 v = cloth.vertices[idx];
+		cloth_v3[idx] = glm::vec3(v.x, v.y, v.z);
+	}
+
+	safe_cuda(cudaMemcpy(_d_x_orignal, &cloth_v3[0], vertices_bytes, cudaMemcpyHostToDevice));
+	safe_cuda(cudaMemcpy(_d_x_cur_in, &cloth_v3[0], vertices_bytes, cudaMemcpyHostToDevice));
+	safe_cuda(cudaMemcpy(_d_x_lst_in, &cloth_v3[0], vertices_bytes, cudaMemcpyHostToDevice));
+
+	//����normal��������ݣ�ÿ�����ڽӵ�������� + ÿ�����3��������� + �Լ����е����������ȻOPENGL�и����ݣ�
+	const size_t vertices_index_bytes = sizeof(unsigned int) * _num_faces * 3;
+	safe_cuda(cudaMemcpy(_d_adj_face_to_vertices, &cloth.faces[0], vertices_index_bytes, cudaMemcpyHostToDevice));
+
+	//initilize to 0
+	safe_cuda(cudaMemset(_d_dir_collision_force, 0, sizeof(glm::vec3) * _num_vertices));
+
+	const size_t vertex_adjface_bytes = sizeof(unsigned int) * _num_vertices * sim_parameter.NUM_PER_VERTEX_ADJ_FACES;
+	safe_cuda(cudaMemcpy(_d_adj_vertex_to_faces, &vertex_adjfaces[0], vertex_adjface_bytes, cudaMemcpyHostToDevice));
+
+	//������Ϣ���������������Ϣ����GPU
+	_springs_builder.build(cloth, _d_adj_spring_st, _d_adj_spring_bd);
+
+#ifdef DEBUG_COLLISION
+	//debug
+	// a safe_cuda(cudaMalloc((void**)&collided_vertex, sizeof(int) * _num_vertices));
+	cudaMemset(collided_vertex, 0, sizeof(int) * _num_vertices);
+	cpu_collided_veretx.resize(_num_vertices);
+	updated_vertex.resize(_num_vertices);
+	faces1 = cloth.faces1;
+#endif
+}
+
+void Simulator::simulate()
+{
+	unsigned int num_threads, num_blocks;
+
+	computeGridSize(_num_vertices, 512, num_blocks, num_threads);
+	verlet << < num_blocks, num_threads >> > (
+		_bvh_tree,
+		_num_vertices,
+		_d_x_cur_in, _d_x_lst_in, _d_x_cur_out, _d_x_lst_out, _d_x_orignal,
+		_d_adj_spring_st, _d_adj_spring_bd,
+		_d_dir_collision_force
+#ifdef DEBUG_COLLISION
+		, collided_vertex
+#endif
+		);
+
+	safe_cuda(cudaDeviceSynchronize());
+
+#ifdef DEBUG_COLLISION
+	cudaMemcpy(&cpu_collided_veretx[0], collided_vertex, sizeof(int)*numParticles, cudaMemcpyDeviceToHost);
+	cudaMemcpy(&updated_vertex[0], _d_vbo_vertices, sizeof(glm::vec4)*numParticles, cudaMemcpyDeviceToHost);
+	cout << "*****collided veretx index************" << endl;
+	for (int i = 0; i < cpu_collided_veretx.size(); i++)
+	{
+		if (cpu_collided_veretx[i] == 1)
+			cout << i << "  ";
+	}
+	cout << endl;
+#endif
+
+	swap_buffer();
+}
+
+void Simulator::ccd()
+{
+	unsigned int num_threads, num_blocks;
+
+	computeGridSize(_num_vertices, 512, num_blocks, num_threads);
+	CCD << < num_blocks, num_threads >> > (
+		_ccd_tree,
+		_num_vertices,
+		_d_x_cur_in, _d_x_lst_in, _d_x_cur_out, _d_x_lst_out, _d_x_orignal,
+		_d_dir_collision_force
+		);
+
+	// stop the CPU until the kernel has been executed
+	safe_cuda(cudaDeviceSynchronize());
+
+	//debug
+	//cudaMemcpy(&cpu_collided_veretx[0],collided_vertex,sizeof(int)*numParticles, cudaMemcpyDeviceToHost);
+	//cudaMemcpy(&updated_vertex[0], _d_vbo_vertices,sizeof(glm::vec4)*numParticles, cudaMemcpyDeviceToHost);
+	//cout << "*****collided veretx index************" << endl;
+	//for (int i = 0; i < cpu_collided_veretx.size(); i++)
+	//{
+	//	if (cpu_collided_veretx[i] == 1)
+	//		cout << i << "  ";
+	//}
+	//cout << endl;
+
+	swap_buffer();
+}
+
+void Simulator::visulize()
+{
+	size_t num_bytes;
+	safe_cuda(cudaGraphicsMapResources(1, &_d_vbo_resource, 0));
+	safe_cuda(cudaGraphicsResourceGetMappedPointer((void **)&_d_vbo_vertices, &num_bytes, _d_vbo_resource));
+	
+	// ��ȡnormalλ��ָ��
+	_d_vbo_normals = (glm::vec3*)((float*)_d_vbo_vertices + 4 * _num_vertices + 2 * _num_textures);   
+	
+	unsigned int num_threads, num_blocks;		
+	computeGridSize(_num_faces, 512, num_blocks, num_threads);
+	// _num_faces
+	get_face_normal << <num_blocks, num_threads >> > (_num_faces, _d_x_cur_in, _d_adj_face_to_vertices, _d_dir_face_normals);
+	safe_cuda(cudaDeviceSynchronize());
+
+	computeGridSize(_num_vertices, 512, num_blocks, num_threads);
+	show_vbo << <num_blocks, num_threads >> > (_num_vertices, _d_vbo_vertices, _d_vbo_normals, _d_x_cur_in, _d_adj_vertex_to_faces, _d_dir_face_normals);
+	safe_cuda(cudaDeviceSynchronize());
+	
+	safe_cuda(cudaGraphicsUnmapResources(1, &_d_vbo_resource, 0));
+}
+
+void Simulator::swap_buffer()
+{
+	int tmp = _id_in;
+	_id_in = _id_out;
+	_id_out = tmp;
+
+	_d_x_cur_in = _d_x_cur[_id_in];
+	_d_x_lst_in = _d_x_lst[_id_in];
+	_d_x_cur_out = _d_x_cur[_id_out];
+	_d_x_lst_out = _d_x_lst[_id_out];
+
+}
+
+#ifdef DEBUG_COLLISION
+
+void Simulator::draw_collided_vertex()
+{
+
+	//draw outline first
+	for (int i = 0; i < _num_faces; i++)
+	{
+		glm::vec4 ver[3];
+		glm::vec3 normal[3];
+		for (int j = 0; j < 3; j++)
+		{
+			ver[j] = updated_vertex[faces1[i].vertex_index[j]];
+		}
+		glPointSize(1.0);
+		glBegin(GL_MODE);
+		glColor3f(1.0, 1.0, 1.0);
+		for (int j = 0; j < 3; j++)
+		{
+			glVertex3f(ver[j].x, ver[j].y, ver[j].z);
+		}
+
+		glEnd();
+	}
+
+
+	for (int i = 0; i < cpu_collided_veretx.size(); i++)
+	{
+		glm::vec4 v = updated_vertex[i];
+		if (cpu_collided_veretx[i] == 1)
+		{
+			//draw it
+			glPointSize(10.0);
+			glBegin(GL_POINTS);
+			glColor3f(1.0, 0, 0);
+			glVertex3f(v.x, v.y, v.z);
+			glEnd();
+		}
+	}
+}
+
+#endif
\ No newline at end of file
diff --git a/Simulator.h b/Simulator.h
new file mode 100644
index 0000000..9ea285d
--- /dev/null
+++ b/Simulator.h
@@ -0,0 +1,152 @@
+#pragma once
+#include <vector>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <cuda_runtime.h>
+#include <cuda_gl_interop.h>
+
+#include <glm/glm.hpp>
+
+#include "SpringsBuilder.h"
+#include "bvh\BVHAccel.h"
+#include "bvh\BVHBuilder.h"
+
+class Cloth;
+class Mesh;
+class BRTreeNode;
+class Primitive;
+
+class Simulator : public RefObject
+{
+public:
+	// ���ڸ���Simulation�õ�����
+	Simulator();
+	~Simulator();
+
+	// һ�����������̬��ײ
+	void simulate();
+	// �������λ��ʱ��ccd��ײ
+	void ccd();
+
+	// ��ʾ���
+	void visulize();
+
+	// debug
+	void draw_collided_vertex();
+
+	// ��ʼ����ģ��ʱ��Ҫ�õ��Ĳ����ռ䣬��ģ�������ģ�йأ��������޹�
+	void update_cloth(const Cloth &cloth);
+	void update_body(const Mesh &body);
+
+private:
+	virtual void ref_auto_clean();
+
+	void create_buffer();
+
+	// ��̬��ײ�����㷨
+	void verlet_cuda();
+
+	// ��̬��ײ�����㷨
+	void ccd_cuda();
+
+	void computeGridSize(unsigned int n, unsigned int block_size, unsigned int &num_blocks, unsigned int &num_threads)
+	{
+		num_threads = std::min(block_size, n);
+		num_blocks = (n % num_threads != 0) ? (n / num_threads + 1) : (n / num_threads);
+	}
+
+	// �������롢����ռ�
+	// ÿ����������ǰλ�á���һ��λ��
+	// ������������������2 * 2 = 4 ��
+	// �����ڼ�������У�����ײ���ܵ�����һ�ε�pose_last != ��һ�ε�pose
+	// �粻��pose_last���ݣ������ռ�洢������ڼ���ʱ��ֱ��Ӱ����һ�ε�pose
+	// ����һ�ε�pose���ᱻ�����̼߳��������ڵ�ʱ�õ�
+	void swap_buffer();
+
+private:
+	//////////////////bvh����///////////////////
+	BVHBuilder _bvh_builder;
+	BVHBuilder _ccd_builder;
+	BVHAccel _bvh_tree;
+	BVHAccel _ccd_tree;
+
+	//////////////////cloth����///////////////////
+	SpringsBuilder _springs_builder;
+
+	// ��ǰbuffer����
+	unsigned int _size_faces;
+	unsigned int _size_vertices;
+
+	// ��ǰʵ������
+	unsigned int _num_faces;
+	unsigned int _num_vertices;
+	unsigned int _num_textures;
+
+	// һ������buffer��ÿ��ֱ�洢�˵�ǰλ�ú�ǰһ��λ�ã��������������롢���
+	// ��id�����swap_buffer�������Ӷ�ʵ�����롢�����Ļ���
+	int _id_in, _id_out;
+
+	// ��������ȫΪ�Դ�ָ��
+private:
+	// ����ԭ�������ﲻ���������Զ����
+	glm::vec3 *_d_x_orignal;
+
+	// ÿ�ε������������ݺ��������ָ�룬�ֵ�ǰֵ����һ��ֵ
+	// ��ʵ�ʿռ䣬ͨ��ָ��_d_x_cur��_d_x_lst�õ���ַ
+	// ͨ������ָ��ʵ��������������ֻ�
+	glm::vec3 *_d_x_cur_in, *_d_x_cur_out;
+	glm::vec3 *_d_x_lst_in, *_d_x_lst_out;
+
+	// ʵ�����ڴ洢�Ŀռ䣬_d_x_cur_in��ָ������
+	glm::vec3 *_d_x_cur[2];
+	glm::vec3 *_d_x_lst[2];
+
+	glm::vec3 *_d_dir_collision_force;		// ��ײ�����������εķ����������û����ײ��Ϊ0
+	glm::vec3 *_d_dir_face_normals;		// ��ķ�����
+
+	// ������-���㣬�ڽӾ���
+	// ������Ƭ��Ӧ�����������ÿ��������Ƭ����������
+	// ��i��������ĵ�j�����㣬λ��Ϊi * 3 + j
+	unsigned int *_d_adj_face_to_vertices;
+
+	// ����-�����Σ��ڽӾ���
+	// ÿ�������Ӧ��������Ƭ������ÿ�������Ӧ�����θ�������
+	// ͳһ����NUM_PER_VERTEX_ADJ_FACES���ڽ�������ռ�
+	// ������UINT_MAX
+	// ����ÿ���㷨����ʱ��Ҫ����Χƽ�������
+	unsigned int *_d_adj_vertex_to_faces;
+
+	// ����-���㣬�ڽӾ���
+	// ��ע�⣩һ������ + ����߽�Լ���Ĳ����ӵ�
+	// ��i������ĵ�j���ھӣ�λ��Ϊi * NUM_PER_VERTEX_SPRING_STRUCT + j
+	// ������UINT_MAX
+	unsigned int *_d_adj_spring_st;
+
+	// ����-���㣬�ڽӾ���
+	// ���������β����������������㣬��Ϊ��������
+	// ��i������ĵ�j���ھӣ�λ��Ϊi * NUM_PER_VERTEX_SPRING_BEND + j
+	// ������UINT_MAX
+	unsigned int *_d_adj_spring_bd;
+
+	// ���ڹ���������Ƶ�opengl��Դ��
+	// ͨ��cuda���룬ֱ����gpu�и�д����������ʾ������
+	cudaGraphicsResource *_d_vbo_resource;
+	glm::vec4 *_d_vbo_vertices;           // ָ��OPENGL buffer��vertex�ĵ�ַ
+	glm::vec3 *_d_vbo_normals;            // ָ��OPENGL buffer��normal�ĵ�ַ
+
+
+// ���ڽ������·��ļ��ֵ��ɣ��������Ϊ_d_adj_spring_st��_d_adj_spring_bd
+	// ���ı�Ϊ�ֲ������������·�ʱʹ�ã������õ��ɺ�����
+		// Springs *cuda_spring;
+
+#ifdef DEBUG_COLLISION
+	// debug
+	int *collided_vertex;
+	std::vector<int> cpu_collided_veretx;
+	Vec4s updated_vertex;
+	vector<Face> faces;
+#endif
+
+};
+
diff --git a/SpringsBuilder.cpp b/SpringsBuilder.cpp
new file mode 100644
index 0000000..3107854
--- /dev/null
+++ b/SpringsBuilder.cpp
@@ -0,0 +1,349 @@
+#include <string>
+#include <set>
+#include <algorithm>
+#include <iostream>
+#include <cuda_runtime.h>
+
+#include "kdtree.h"
+#include "Utilities.h"
+#include "Cloth.h"
+#include "SpringsBuilder.h"
+
+using namespace std;
+
+class Matrix
+{
+public:
+	void Insert_Matrix(unsigned int i, unsigned int j, unsigned int k, vector<pair<unsigned int, unsigned int>> &value_inline)
+	{
+		map<pair<unsigned int, unsigned int>, unsigned int>::iterator ite1 = mat.find(make_pair(i, j));
+		map<pair<unsigned int, unsigned int>, unsigned int>::iterator ite2 = mat.find(make_pair(j, i));
+		if (mat.end() != ite1)
+		{
+			value_inline.push_back(make_pair(ite1->second, k)); return;
+		}
+		if (mat.end() != ite2)
+		{
+			value_inline.push_back(make_pair(ite2->second, k)); return;
+		}
+
+		mat.insert(make_pair(make_pair(i, j), k));
+	}
+private:
+	map<pair<unsigned int, unsigned int>, unsigned int>  mat;
+};
+
+///////////////////////////////
+bool SpringsBuilder::exist(const vector<unsigned int>& array, const unsigned int val)
+{
+	if (array.end() == find(array.begin(), array.end(), val))
+		return false;
+	else
+		return true;
+}
+
+bool SpringsBuilder::build(const Cloth &cloth, unsigned int *adj_spring_st, unsigned int *adj_spring_bd)
+{
+	cout << "build springs" << endl;
+	if (cloth.get_obj_type() == SINGLE_LAYER_BOUNDARY)
+	{
+		get_cloth_boundary_spring(cloth); //������Ҫ����
+		get_boundary_boundary_spring(cloth);
+	}
+
+	// ������͵���ڽӾ��󣨱䳤������Ϊ�ڽ�������
+	// allanyu �㷨�����ã�Ҫ��д
+	//create neigh1 for each vertex
+	neigh1.resize(cloth.vertices.size());
+	for (int i = 0; i < cloth.faces.size(); i++)
+	{
+		unsigned int f[3];
+		f[0] = cloth.faces[i].v0;
+		f[1] = cloth.faces[i].v1;
+		f[2] = cloth.faces[i].v2;
+
+		if (!exist(neigh1[f[0]], f[1]))   //ȥ��neighbour���ظ����ڽӵ�
+			neigh1[f[0]].push_back(f[1]);
+		if (!exist(neigh1[f[0]], f[2]))
+			neigh1[f[0]].push_back(f[2]);
+
+		if (!exist(neigh1[f[1]], f[0]))
+			neigh1[f[1]].push_back(f[0]);
+		if (!exist(neigh1[f[1]], f[2]))
+			neigh1[f[1]].push_back(f[2]);
+
+		if (!exist(neigh1[f[2]], f[0]))
+			neigh1[f[2]].push_back(f[0]);
+		if (!exist(neigh1[f[2]], f[1]))
+			neigh1[f[2]].push_back(f[1]);
+	}
+
+	// δ���ӣ�������߽�Լ���Ķ���֮�䣬Ҳ��ɵ���
+	for (int i = 0; i < cloth_boundary_springs.size(); i++)
+	{
+		unsigned int idx1 = cloth_boundary_springs[i].first;
+		unsigned int idx2 = cloth_boundary_springs[i].second;
+
+		neigh1[idx1].push_back(idx2);
+		neigh1[idx2].push_back(idx1);
+	}
+
+	for (auto spring : boundary)
+		neigh1[spring.first].push_back(spring.second);
+
+	// allanyu����������Ҳ���������ظ���ȥ��Ӧ�����������
+
+	//create neigh2 for each vertex
+	neigh2.resize(cloth.vertices.size());
+	Matrix NR;   //Neighbour Relation
+	vector<pair<unsigned int, unsigned int>> point_inline;  //�洢�������������ζԽǶ�������
+
+	// �ҳ����ߵ����������εĲ�����������������
+	// ͨ�����ΰ�ÿ������Ϊkey��ʣ���Ǹ�������value������һ��map
+	// ���map������һ��������key���Ǹ�value�͵�ǰ��value����һ�����������Ķ����
+	// ���Ӷ�O(f lg e)
+	// ���⻹��һ��ʵ�֣�ͨ���ڽ����ң��㷨����ͬ�Ҷ����һ���ھӣ������Ӷ�O(f)
+	for (int i = 0; i < cloth.faces.size(); i++)
+	{
+		unsigned int f[3];
+		f[0] = cloth.faces[i].v0;
+		f[1] = cloth.faces[i].v1;
+		f[2] = cloth.faces[i].v2;
+
+		NR.Insert_Matrix(f[0], f[1], f[2], point_inline);
+		NR.Insert_Matrix(f[0], f[2], f[1], point_inline);
+		NR.Insert_Matrix(f[1], f[2], f[0], point_inline);
+	}
+
+	for (int i = 0; i < point_inline.size(); i++)
+	{
+		unsigned int fir = point_inline[i].first;
+		unsigned int sec = point_inline[i].second;
+
+		neigh2[fir].push_back(sec);
+		neigh2[sec].push_back(fir);
+	}
+	cout << "springs build successfully!" << endl;
+
+	vector<unsigned int> cpu_neigh1(neigh1.size() * sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT, 0);
+	vector<unsigned int> cpu_neigh2(neigh2.size() * sim_parameter.NUM_PER_VERTEX_SPRING_BEND, 0);
+
+	for (int i = 0; i < neigh1.size(); i++)
+	{
+		unsigned int j;
+		for (j = 0; j < neigh1[i].size() && j < sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT; j++)
+		{
+			cpu_neigh1[i*sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT + j] = neigh1[i][j];
+		}
+		if (sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT > neigh1[i].size())
+			cpu_neigh1[i*sim_parameter.NUM_PER_VERTEX_SPRING_STRUCT + j] = UINT_MAX;     //sentinel
+
+	}
+
+	for (int i = 0; i < neigh2.size(); i++)
+	{
+		unsigned int j;
+		for (j = 0; j < neigh2[i].size() && j < sim_parameter.NUM_PER_VERTEX_SPRING_BEND; j++)
+		{
+			cpu_neigh2[i*sim_parameter.NUM_PER_VERTEX_SPRING_BEND + j] = neigh2[i][j];
+		}
+		if (sim_parameter.NUM_PER_VERTEX_SPRING_BEND > neigh2[i].size())
+			cpu_neigh2[i*sim_parameter.NUM_PER_VERTEX_SPRING_BEND + j] = UINT_MAX;     //sentinel
+	}
+
+	safe_cuda(cudaMemcpy(adj_spring_st, &cpu_neigh1[0], cpu_neigh1.size() * sizeof(unsigned int), cudaMemcpyHostToDevice));
+	safe_cuda(cudaMemcpy(adj_spring_bd, &cpu_neigh2[0], cpu_neigh2.size() * sizeof(unsigned int), cudaMemcpyHostToDevice));
+	return true;
+}
+
+void SpringsBuilder::get_cloth_boundary_spring(const Cloth &cloth)
+{
+	//��һ�ν�����֮��Ӧ�ñ��������Ϣ���ı�����֡simulationʱʹ�õ�һ�ε�����
+	//����˳��Ϊcloth1,cloth1_boundary,cloth2,cloth2_boundary...
+
+	int g_start = 0;
+	unsigned int *idx = new unsigned int[cloth.vertices.size()];
+	for (int n = 0; n < cloth.objects.size(); n += 2)
+	{
+		unsigned int group_size = cloth.objects[n].second;
+		kdtree *kd = kd_create(3);
+
+		for (unsigned int i = 0; i < group_size; i++)   //Ϊ��Ƭ1����kdtree
+		{
+			idx[i + g_start] = i + g_start;
+			int ret = kd_insert3f(kd, cloth.vertices[i + g_start].x,
+				cloth.vertices[i + g_start].y,
+				cloth.vertices[i + g_start].z,
+				&idx[i + g_start]);
+		}
+		g_start += cloth.objects[n].second;
+
+		for (unsigned int i = 0; i < cloth.objects[n + 1].second; i++)    //Ϊ�߽��еĵ������ڽ���
+		{
+			float kdpos[3];
+			kdres *result = kd_nearest3f(kd, cloth.vertices[i + g_start].x,
+				cloth.vertices[i + g_start].y,
+				cloth.vertices[i + g_start].z);
+			int *resultidx = (int*)kd_res_itemf(result, kdpos);
+			cloth_boundary_springs.push_back(make_pair(i + g_start, *resultidx));
+		}
+		g_start += cloth.objects[n + 1].second;
+
+		kd_free(kd);
+	}
+	delete[]idx;
+
+}
+
+void SpringsBuilder::get_boundary_boundary_spring(const Cloth &cloth)
+{
+	//���ѡȡN����Ϊ�߽�����Ƭ���ڵ�֮���������
+	float max_dist = 0;
+	const unsigned int NUM = 100;
+	for (int i = 0; i < NUM; i++)
+	{
+		unsigned int idx1 = cloth_boundary_springs[i].first;
+		unsigned int idx2 = cloth_boundary_springs[i].second;
+		std::cout << idx1 << " " << idx2 << std::endl; 
+		max_dist += glm::distance(cloth.vertices[idx1], cloth.vertices[idx2]);
+	}
+	max_dist /= NUM;
+	cout << "�߽�����Ƭ���ڵ�֮��������룺" << max_dist << endl;
+
+	////Ϊ�߽�֮�佨������:�̶�һ�飬��ʣ��boundary��������
+	vector<pair<unsigned int, unsigned int>> start_end;
+	int start = 0;
+	for (int n = 0; n < cloth.objects.size(); n += 2)
+	{
+		start += cloth.objects[n].second;
+		start_end.push_back(make_pair(start, start + cloth.objects[n + 1].second));
+		start += cloth.objects[n + 1].second;
+	}
+
+	int *idx = new int[cloth.vertices.size()];
+	for (int i = 0; i < start_end.size(); i++)
+	{
+		//��ǰ����Ϊ��iƬboundary
+		//Ϊ��i�����������boundary����kdtree
+		kdtree *kd = kd_create(3);
+		for (int j = 0; j < start_end.size(); j++)
+		{
+			if (j == i) continue;
+			for (unsigned int k = start_end[j].first; k < start_end[j].second; k++)
+			{
+				idx[k] = k;
+				int ret = kd_insert3f(kd, cloth.vertices[k].x,
+					cloth.vertices[k].y,
+					cloth.vertices[k].z,
+					&idx[k]);
+			}
+		}
+
+		//��ʼ��������������
+		for (unsigned int k = start_end[i].first; k < start_end[i].second; k++)
+		{
+			float kdpos[3];
+			kdres *result = kd_nearest3f(kd, cloth.vertices[k].x,
+				cloth.vertices[k].y,
+				cloth.vertices[k].z);
+			int *resultidx = (int*)kd_res_itemf(result, kdpos);
+
+			if (glm::distance(cloth.vertices[k], cloth.vertices[*resultidx]) < max_dist * 50
+				&& glm::distance(cloth.vertices[k], cloth.vertices[*resultidx]) > 0) //��������жϣ���ֹ����
+			{
+				boundary_boundary_springs.push_back(make_pair(k, *resultidx));
+			}
+		}
+
+		kd_free(kd);
+	}
+	delete[]idx;
+
+
+	//map[boundary,cloth]
+	map<unsigned int, unsigned int> map_spring;
+	for (auto spring : cloth_boundary_springs)
+		map_spring[spring.first] = spring.second;
+
+	for (auto spring : boundary_boundary_springs)
+	{
+		boundary.insert(make_pair(map_spring[spring.first], map_spring[spring.second]));
+	}
+
+
+
+
+
+
+
+
+	////FR->_Piece2
+	//vector<pair<unsigned int,unsigned int>> start_end;
+	//int start = 0;
+	//for(int n=0;n<cloth.objects.size();n++)
+	//{
+	//	start_end.push_back(make_pair(start,start+cloth.objects[n].second));
+	//	start += cloth.objects[n].second;
+	//}
+	//unsigned int *idx = new unsigned int[cloth.vertices.size()]; 
+	//kdtree *kd = k create(3);
+	//for (int i=start_end[8].first;i<start_end[8].second;i++)   //Ϊ��Ƭ1����kdtree
+	//	{
+	//		idx[i] = i;
+	//		int ret = k insert3f(kd, cloth.vertices[i].x,
+	//			cloth.vertices[i].y,
+	//			cloth.vertices[i].z,
+	//			&idx[i]);
+	//	}
+
+	//for(int i=start_end[1].first;i<start_end[1].second;i++)
+	//{
+	//	float kdpos[3];
+	//		kdres *result = k nearest3f(kd, cloth.vertices[i].x,
+	//			cloth.vertices[i].y,
+	//			cloth.vertices[i].z);
+	//		int *resultidx = (int*)k res_itemf(result, kdpos);
+
+	//		if (glm::distance(cloth.vertices[i],cloth.vertices[*resultidx]) < max_dist*20) //��������жϣ���ֹ����
+	//		{
+	//			boundary_boundary_springs.push_back(make_pair(i,*resultidx));
+	//		}
+	//}
+
+
+}
+
+#if 0
+void SpringsBuilder::draw(Obj &cloth)
+{
+	for (int i = 0; i < neigh1.size(); i++)
+	{
+		glm::vec4 v1 = cloth.vertices[i];
+		for (int j = 0; j < neigh1[i].size(); j++)
+		{
+			glm::vec4 v2 = cloth.vertices[neigh1[i][j]];
+
+			glBegin(GL_LINES);
+			glColor3f(1.0, 1.0, 1.0);
+			glVertex3f(v1.x, v1.y, v1.z);
+			glVertex3f(v2.x, v2.y, v2.z);
+			glEnd();
+		}
+	}
+
+	for (int i = 0; i < neigh2.size(); i++)
+	{
+		glm::vec4 v1 = cloth.vertices[i];
+		for (int j = 0; j < neigh2[i].size(); j++)
+		{
+			glm::vec4 v2 = cloth.vertices[neigh2[i][j]];
+			glBegin(GL_LINES);
+			glColor3f(1.0, 0, 0);
+			glVertex3f(v1.x, v1.y, v1.z);
+			glVertex3f(v2.x, v2.y, v2.z);
+			glEnd();
+		}
+	}
+}
+
+#endif
\ No newline at end of file
diff --git a/SpringsBuilder.h b/SpringsBuilder.h
new file mode 100644
index 0000000..69c93fe
--- /dev/null
+++ b/SpringsBuilder.h
@@ -0,0 +1,30 @@
+#pragma once
+#include <set>
+#include <vector>
+#include <map>
+
+#include "parameter.h"
+
+class Cloth; 
+
+// allanyu ���ļ���ش�����δ�Ż�
+class SpringsBuilder
+{
+public:
+	// allanyu ��ͷ�ٿ�
+	// void draw(Obj &cloth);
+
+	bool build(const Cloth &cloth, unsigned int *adj_spring_st, unsigned int *adj_spring_bd);
+private:
+	std::vector<std::pair<unsigned int,unsigned int>> cloth_boundary_springs;   //ֻ����pair(1,2)
+	std::vector<std::pair<unsigned int,unsigned int>> boundary_boundary_springs;   //Ӧ���Ѿ�����pair(1,2) && pair(2,1)
+	std::set<std::pair<unsigned int,unsigned int>> boundary;
+	std::vector<std::vector<unsigned int>> neigh1;   //�洢ÿ���������һ��������Ϣ(�洢�������),�� structure spring
+	std::vector<std::vector<unsigned int>> neigh2;   //�洢ÿ��������ж���������Ϣ(�洢�������),�� bend spring
+
+private:
+	//void ad spring(float stiffness,Vec4s& vertices,unsigned int p1,unsigned int p2);
+	bool exist(const std::vector<unsigned int>& array, const unsigned int val);
+	void get_cloth_boundary_spring(const Cloth &cloth);
+	void get_boundary_boundary_spring(const Cloth &cloth);
+};
\ No newline at end of file
diff --git a/Utilities.cpp b/Utilities.cpp
new file mode 100644
index 0000000..a14a5b3
--- /dev/null
+++ b/Utilities.cpp
@@ -0,0 +1,36 @@
+
+#include <time.h>
+#include <iostream>
+#include <string>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+
+#include "Utilities.h"
+
+using namespace std;
+ 
+float startTime = 0;
+int totalFrames = 0;
+float currentTime = 0;
+char info[MAX_PATH] = { 0 };
+
+void getFPS()
+{
+	float fps = 0;
+	float newTime = (float)glutGet(GLUT_ELAPSED_TIME);
+	float frameTime = newTime - currentTime;
+	currentTime = newTime;
+	++totalFrames;
+	if ((newTime - startTime)>1000)
+	{
+		float elapsedTime = (newTime - startTime);
+		fps = (totalFrames / elapsedTime) * 1000;
+		startTime = newTime;
+		totalFrames = 0;
+
+		sprintf_s(info, "GLUT Cloth Demo FPS: %4.3f", fps);
+		//cout << fps << endl;
+	}
+	glutSetWindowTitle(info);
+}
\ No newline at end of file
diff --git a/Utilities.h b/Utilities.h
new file mode 100644
index 0000000..228a958
--- /dev/null
+++ b/Utilities.h
@@ -0,0 +1,206 @@
+#pragma once
+
+#include <windows.h>
+
+#include <vector>
+#include <iostream>
+#include <algorithm>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <cuda_runtime.h>
+#include <cuda_gl_interop.h>
+
+#include "Common.h"
+
+/*check error code of cudaMalloc and print out if needed*/
+#define safe_cuda(CODE)\
+ {\
+  cudaError_t err = CODE;\
+  if(err != cudaSuccess) {\
+    std::cout<<"CUDA error:"<<cudaGetErrorString(err)<<std::endl;\
+ }\
+}
+
+
+template<class Key>
+class SortAssistant
+{
+public:
+	SortAssistant(const std::vector<Key> &keys) : _keys(keys) { }
+
+	bool operator()(const unsigned int &lhs, const unsigned int &rhs)
+	{
+		return _keys[lhs] < _keys[rhs];
+	}
+private:
+	const std::vector<Key> &_keys;
+};
+
+// ����������vector��֧��������ʵ�����
+template<class Containers>
+void indices_sort(const Containers &keys, std::vector<unsigned int> &indices)
+{
+	indices.resize(keys.size());
+	for (int i = 0; i < keys.size(); ++i)
+	{
+		indices[i] = i;
+	}
+	std::sort(indices.begin(), indices.end(), SortAssistant<typename Containers::value_type>(keys));
+
+}
+
+// Ϊ֧�ָ�ͨ�õ���������û�ȷ��indices������������keys�Ĵ�С
+template<class Containers>
+void remove_redundant(const Containers &keys, std::vector<unsigned int> &indices)
+{
+	if (indices.empty())
+	{
+		return;
+	}
+
+	unsigned int n = 1;
+	typename Containers::value_type current = keys[indices[0]];
+	for (int i = 1; i < indices.size(); ++i)
+	{
+		if (keys[indices[i]] != current)
+		{
+			indices[n++] = indices[i];
+			current = keys[indices[i]];
+		}
+	}
+
+	indices.resize(n);
+}
+
+// Ϊ֧�ָ�ͨ�õ���������û�ȷ��indices������������keys�Ĵ�С
+template<class Containers>
+void filter(const Containers &input, const std::vector<unsigned int> &indices, Containers &ouput)
+{
+	ouput.resize(indices.size()); 
+	for (int i = 0; i < indices.size(); ++i)
+	{
+		unsigned int index = indices[i];
+		ouput[i] = input[indices[i]];
+	}
+}
+
+
+/**
+ *alloc a memory on gpu and copy data from cpu to gpu.
+*/
+inline void copyFromCPUtoGPU(void* *dst, void *src, int size)
+{
+	cudaMalloc(dst, size);
+	safe_cuda(cudaMemcpy(*dst, src, size, cudaMemcpyHostToDevice));
+}
+
+/**
+ *alloc a memory on cpu and copy data from gpu to cpu.
+*/
+inline void copyFromGPUtoCPU(void* *dst, void *src, int size)
+{
+	*dst = malloc(size);
+	safe_cuda(cudaMemcpy(*dst, src, size, cudaMemcpyDeviceToHost));
+}
+
+
+class RefObject
+{
+public:
+	RefObject()
+	{
+		ref_create();
+	}
+	RefObject(const RefObject &rhs) : _reference(rhs._reference)
+	{
+		++(*_reference);
+	}
+	RefObject &operator=(const RefObject &rhs)
+	{
+		ref_release();
+
+		_reference = rhs._reference;
+		++(*_reference);
+
+		return *this;
+	}
+
+protected:
+	virtual void ref_auto_clean() = 0;
+
+	void ref_create()
+	{
+		_reference = new int;
+		*_reference = 1;
+	}
+
+	void ref_release()
+	{
+		if (--(*_reference) == 0)
+		{
+			ref_auto_clean();
+			delete _reference;
+		}
+
+		_reference = NULL;
+	}
+
+	void ref_renew()
+	{
+		ref_release();
+		ref_create(); 
+	}
+
+private:
+	int *_reference;
+};
+
+
+class StopWatch
+{
+public:
+	StopWatch() : _elapsed(0)
+	{
+		QueryPerformanceFrequency(&_freq);
+	}
+	~StopWatch() { }
+public:
+	void start()
+	{
+		QueryPerformanceCounter(&_start);
+	}
+	void stop()
+	{
+		LARGE_INTEGER end;
+		QueryPerformanceCounter(&end);
+		_elapsed += (end.QuadPart - _start.QuadPart) * 1000000 / _freq.QuadPart;
+	}
+	void restart()
+	{
+		_elapsed = 0;
+		start();
+	}
+	//΢��
+	double elapsed()
+	{
+		return double(_elapsed);
+	}
+	//����
+	double elapsed_ms()
+	{
+		return double(_elapsed) / 1000.0;
+	}
+	//��
+	double elapsed_second()
+	{
+		return double(_elapsed) / 1000000.0;
+	}
+
+private:
+	LARGE_INTEGER _freq;
+	LARGE_INTEGER _start;
+	sint64 _elapsed;
+};
+
+void getFPS();
diff --git a/VAOMesh.h b/VAOMesh.h
new file mode 100644
index 0000000..c29639c
--- /dev/null
+++ b/VAOMesh.h
@@ -0,0 +1,22 @@
+#pragma once
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+
+#include "Mesh.h"
+
+struct VAO_Buffer
+{
+	GLuint vao;
+	GLuint array_buffer;
+	GLuint index_buffer;
+	GLuint texture;
+	GLuint index_size;
+};
+
+class VAOMesh : public Mesh
+{
+public:
+	VAO_Buffer vbo;
+};
+
+
diff --git a/Verlet.h b/Verlet.h
new file mode 100644
index 0000000..1efaa1b
--- /dev/null
+++ b/Verlet.h
@@ -0,0 +1,40 @@
+#pragma once
+#include <cuda.h>
+#include <device_functions.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <glm/glm.hpp>
+
+class BRTreeNode;
+class Primitive;
+
+//update cloth face normal
+__global__ void get_face_normal(
+	const unsigned int num_faces,
+	glm::vec3 *x_cur_in,
+	unsigned int *adj_face_to_vertices,
+	glm::vec3 *dir_face_normals);
+
+__global__ void show_vbo(
+	const unsigned int num_vertices,
+	glm::vec4 *vbo_vertices, glm::vec3 *vbo_normals,
+	glm::vec3 *x, unsigned int *adj_vertex_to_faces, glm::vec3 *dir_face_normals);
+
+__global__ void CCD(
+	const BVHAccel tree,
+	const unsigned int num_vertices,
+	glm::vec3 *x_cur_in, glm::vec3 *x_lst_in, glm::vec3 *x_cur_out, glm::vec3 *x_lst_out, glm::vec3 *x_orignal,
+	glm::vec3 *dir_collision_force);
+
+// verlet intergration
+__global__ void verlet(
+	const BVHAccel tree,
+	const unsigned int num_vertices,
+	glm::vec3 *x_cur_in, glm::vec3 *x_lst_in, glm::vec3 *x_cur_out, glm::vec3 *x_lst_out, glm::vec3 *x_orignal,
+	unsigned int *adj_spring_st, unsigned int *adj_spring_bd,
+	glm::vec3 *dir_collision_force
+#ifdef DEBUG_COLLISION
+	, int *collided_vertex
+#endif
+);
diff --git a/bvh/BRTreeNode.h b/bvh/BRTreeNode.h
new file mode 100644
index 0000000..05f9c07
--- /dev/null
+++ b/bvh/BRTreeNode.h
@@ -0,0 +1,96 @@
+#pragma once
+
+#include <vector>
+
+#include <cuda_runtime.h>
+
+#include "BBox.h"
+
+/**
+ *BRTreeNode
+ *
+ *BRTreeNode stands for a node in the
+ *binary radix tree.
+ *
+ *the index of children and parent node
+ *into the node array is encoded in the
+ *following way:
+ *
+ *1) When the value is positive, it
+ *refers to the node in internal node array.
+ *the encoded value is (val-1)
+ *
+ *2) When the value is negative, it refers to
+ *the node in leaf node array. And in the latter
+ *situation, the encoded value is -(val+1)
+ *
+ *For example: If childA is 3, it means the left
+ *child of the current node is in internal node
+ *array with an offset of 2. If the childB is -1,
+ *it means the right child of the current node
+ *is in the leaf node array with an offset of 0.
+ */
+
+// Allan yu ��û���Ż��ú����漰����
+class BRTreeNode
+{
+public:
+	BRTreeNode() : _lft(-1), _rht(-1), _parent(-1), _pid(-1), _bbox() { }
+
+	__host__ __device__ int lft() const { return _lft; }
+	__host__ __device__ int rht() const { return _rht; }
+	__host__ __device__ int parent() const { return _parent; }
+	__host__ __device__ int pid() const { return _pid; }
+
+	__host__ __device__ const BBox &bbox() const
+	{ 
+		return _bbox;
+	}
+
+	__host__ __device__ bool intersect(const glm::vec3 &point) const
+	{
+		return _bbox.intersect(point);
+	}
+
+	__host__ __device__ bool leaf() const
+	{
+		return _pid >= 0;
+	}
+
+	__host__ __device__ void set_lft(int l) { _lft = l; }
+	__host__ __device__ void set_rht(int r) { _rht = r; }
+	__host__ __device__ void set_parent(int p) { _parent = p; }
+	__host__ __device__ void set_idx(int index) { _pid = index; }
+	__host__ __device__ void set_bbox(const BBox &bbox)
+	{
+		_bbox = bbox;
+	}
+
+	__host__ __device__ void expand(const BBox &bbox) 
+	{ 
+		_bbox.expand(bbox);
+	}
+
+	__host__ void printInfo()
+	{
+		printf("-----\n");
+		printf("childA:(%d)\n", lft());
+		printf("childB:(%d)\n", rht());
+		printf("parent:(%d)\n", parent());
+		printf("index:%d\n", pid());
+	}
+
+private:
+	BBox _bbox;
+
+	int _lft;
+	int _rht;
+	int _parent;
+	int _pid;
+
+	friend class BVHBuilder; 
+};
+
+typedef std::vector<BRTreeNode> BRTreeNodes;
+
+
diff --git a/bvh/BVHAccel.cpp b/bvh/BVHAccel.cpp
new file mode 100644
index 0000000..d375a86
--- /dev/null
+++ b/bvh/BVHAccel.cpp
@@ -0,0 +1,108 @@
+#include <iostream>
+#include <algorithm>
+#include <bitset>
+
+#include <cuda.h>
+#include <device_functions.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <thrust/sort.h>
+#include <thrust/execution_policy.h>
+
+#include "../Utilities.h"
+#include "BVHAccel.h"
+#include "Primitive.h"
+#include "BVHBuilder.h"
+
+using namespace std;
+
+///////////////////////////////////////////////////////////
+BVHAccel::BVHAccel() :
+	_number_faces(0),
+	_number_vertices(0),
+	_vertices_lst(NULL),
+	_vertices_cur(NULL),
+	_primitives_lst(NULL),
+	_primitives_cur(NULL),
+	_tree_nodes(NULL)
+{
+
+}
+
+void BVHAccel::resize(unsigned int number_vertices, unsigned int number_faces)
+{
+	if (number_vertices <= _number_vertices && number_faces <= _number_faces)
+	{
+		return;
+	}
+	ref_renew();
+
+	_number_faces = number_faces;
+	_number_vertices = number_vertices;
+
+	safe_cuda(cudaMalloc(&_vertices_lst, sizeof(glm::vec3) * number_vertices));
+	safe_cuda(cudaMalloc(&_vertices_cur, sizeof(glm::vec3) * number_vertices));
+
+	safe_cuda(cudaMalloc(&_primitives_cur, sizeof(Primitive) * number_faces));
+	safe_cuda(cudaMalloc(&_primitives_lst, sizeof(Primitive) * number_faces));
+
+	safe_cuda(cudaMalloc(&_tree_nodes, sizeof(BRTreeNode) * number_faces * 2 - 1));
+}
+
+void BVHAccel::ref_auto_clean()
+{
+	cudaFree(_vertices_lst);
+	cudaFree(_vertices_cur);
+
+	cudaFree(_primitives_cur);
+	cudaFree(_primitives_lst);
+	cudaFree(_tree_nodes);
+
+#if 0
+	free(h_leaf_nodes);
+	free(h_internal_nodes);
+#endif
+}
+
+
+#if 0
+void BVHAccel::access(BRTreeNode *root, vector<BRTreeNode*> &bad_bode)
+{
+	if (root->bbox.min.x > root->bbox.max.x)
+	{
+		if (is_leaf(root))
+		{
+			bad_bode.push_back(root);
+			return;
+		}
+		else
+		{
+			access(get_left_child(root), bad_bode);
+			access(get_right_child(root), bad_bode);
+		}
+	}
+}
+
+void BVHAccel::pre_drawoutline()
+{
+	copyFromGPUtoCPU((void**)&h_internal_nodes, _internal_nodes, sizeof(BRTreeNode) * num_internal_node);
+	copyFromGPUtoCPU((void**)&h_leaf_nodes, _leaf_nodes, sizeof(BRTreeNode) * num_leaf_node);
+}
+
+void BVHAccel::draw(BRTreeNode *root)
+{
+	root->bbox.draw();
+	if (is_leaf(root))
+	{
+		return;
+	}
+	else
+	{
+		draw(get_left_child(root));
+		draw(get_right_child(root));
+	}
+}
+#endif
+
+
diff --git a/bvh/BVHAccel.h b/bvh/BVHAccel.h
new file mode 100644
index 0000000..76c50c4
--- /dev/null
+++ b/bvh/BVHAccel.h
@@ -0,0 +1,320 @@
+#pragma once
+#include <vector> 
+
+#include "../Utilities.h"
+#include "BBox.h"
+#include "Primitive.h"
+#include "BRTreeNode.h"
+
+/**
+ *Bounding Volume Hierarchy for fast point-objects intersection.
+ *Note that the BVHAccel is an Aggregate (A Primitive itself) that contains
+ *all the _primitives_cur it was built from. Therefore once a BVHAccel Aggregate
+ *is created, the original input _primitives_cur can be ignored from the scene
+ *during point-objects intersection tests as they are contained in the aggregate.
+*/
+class BVHAccel : public RefObject
+{
+public:
+	BVHAccel();
+	~BVHAccel() { ref_release(); }
+
+private:
+	virtual void ref_auto_clean();
+
+	void resize(unsigned int number_vertices, unsigned int number_faces);
+
+public:
+	__host__ __device__ BRTreeNode *get_root() const
+	{
+		return _tree_nodes;
+	}
+
+	__host__ __device__ BRTreeNode *get_left_child(BRTreeNode *node) const
+	{
+		int idx = node->lft();
+		return idx < 0 ? NULL : _tree_nodes + idx;
+	}
+
+	__host__ __device__ BRTreeNode *get_right_child(BRTreeNode *node) const
+	{
+		int idx = node->rht();
+		return idx < 0 ? NULL : _tree_nodes + idx;
+	}
+
+	__host__ __device__ bool intersect(const glm::vec3 point, int &idx) const
+	{
+		// Allocate traversal stack from thread-local memory,
+		// and push NULL to indicate that there are no postponed nodes.
+		BRTreeNode *stack[64];
+		BRTreeNode **stackPtr = stack;
+		*stackPtr++ = NULL; // push
+
+							// Traverse nodes starting from the root.
+		BRTreeNode *node = get_root();
+		do
+		{
+			// Check each child node for overlap.
+			BRTreeNode *child_lft = get_left_child(node);
+			BRTreeNode *child_rht = get_right_child(node);
+			bool overlap_lft = child_lft->intersect(point);
+			bool overlap_rht = child_rht->intersect(point);
+			bool leaf_lft = child_lft->leaf();
+			bool leaf_rht = child_rht->leaf();
+
+			// Query overlaps a leaf node => report collision with the first collision.
+			if (overlap_lft && leaf_lft)
+			{
+				idx = child_lft->pid();       //is a leaf, and we can get it through primitive[idx]
+				return true;
+			}
+
+			if (overlap_rht && leaf_rht)
+			{
+				idx = child_rht->pid();
+				return true;
+			}
+
+			// Query overlaps an internal node => traverse.
+			bool traverse_lft = (overlap_lft && !leaf_lft);
+			bool traverse_rht = (overlap_rht && !leaf_rht);
+
+			if (!traverse_lft && !traverse_rht)
+			{
+				node = *--stackPtr; // pop
+			}
+			else
+			{
+				node = (traverse_lft) ? child_lft : child_rht;
+				if (traverse_lft && traverse_rht)
+					*stackPtr++ = child_rht; // push
+			}
+		} while (node != NULL);
+
+		return false;
+	}
+
+	__host__ __device__ bool coplanarIntersect(const glm::vec3 point, int &idx) const
+	{
+		// Allocate traversal stack from thread-local memory,
+		// and push NULL to indicate that there are no postponed nodes.
+		BRTreeNode *stack[64];
+		BRTreeNode **stackPtr = stack;
+		*stackPtr++ = NULL; // push
+
+							// Traverse nodes starting from the root.
+		BRTreeNode *node = get_root();
+		do
+		{
+			// Check each child node for overlap.
+			float dist_lst, dist_cur; 
+			BRTreeNode *child_lft = get_left_child(node);
+			BRTreeNode *child_rht = get_right_child(node);
+			bool overlap_lft = child_lft->intersect(point);
+			bool overlap_rht = child_rht->intersect(point);
+			bool leaf_lft = child_lft->leaf();
+			bool leaf_rht = child_rht->leaf();
+
+			// Query overlaps a leaf node => report collision with the first collision.
+			if (overlap_lft && leaf_lft)
+			{
+				idx = child_lft->pid();       //is a leaf, and we can get it through primitive[idx]
+				if (overlap_rht && leaf_rht)
+				{
+					idx = (glm::distance(point, child_lft->bbox().centroid()) > glm::distance(point, child_rht->bbox().centroid())) ? child_rht->pid() : child_lft->pid();
+				}
+				dist_cur = _primitives_cur[idx].distance_to(point);
+				dist_lst = _primitives_lst[idx].distance_to(point);
+				if (dist_cur < 0 && dist_lst > 0)	return true;
+			}
+
+			if (overlap_rht && leaf_rht)
+			{
+				idx = child_rht->pid();
+				dist_cur = _primitives_cur[idx].distance_to(point);
+				dist_lst = _primitives_lst[idx].distance_to(point);
+				if (dist_cur < 0 && dist_lst > 0)	return true;
+			}
+
+			// Query overlaps an internal node => traverse.
+			bool traverse_lft = (overlap_lft && !leaf_lft);
+			bool traverse_rht = (overlap_rht && !leaf_rht);
+
+			if (!traverse_lft && !traverse_rht)
+				node = *--stackPtr; // pop
+			else
+			{
+				if (traverse_lft && traverse_rht)
+				{
+					node = (glm::distance(point, child_lft->bbox().centroid()) > glm::distance(point, child_rht->bbox().centroid())) ? child_rht : child_lft;
+					*stackPtr++ = (node == child_lft) ? child_rht : child_lft;// push
+				}
+
+				else node = (traverse_lft) ? child_lft : child_rht;
+			}
+		} while (node != NULL);
+
+		return false;
+	}
+
+	__host__ __device__ bool nearestIntersect(const glm::vec3 point, int &idx)
+	{
+		// Allocate traversal stack from thread-local memory,
+		// and push NULL to indicate that there are no postponed nodes.
+		BRTreeNode *stack[64];
+		BRTreeNode **stackPtr = stack;
+		*stackPtr++ = NULL; // push
+
+							// Traverse nodes starting from the root.
+		BRTreeNode *node = get_root();
+		do
+		{
+			// Check each child node for overlap.
+			BRTreeNode *child_lft = get_left_child(node);
+			BRTreeNode *child_rht = get_right_child(node);
+			bool overlap_lft = child_lft->intersect(point);
+			bool overlap_rht = child_lft->intersect(point);
+			bool leaf_lft = child_lft->leaf();
+			bool leaf_rht = child_rht->leaf();
+
+			// Query overlaps a leaf node => report collision with the first collision.
+			if (overlap_lft && leaf_lft)
+			{
+				if (overlap_rht && leaf_rht)
+				{
+					idx = (glm::distance(point, child_lft->bbox().centroid()) > glm::distance(point, child_rht->bbox().centroid())) ? child_rht->pid() : child_lft->pid();
+					return true;
+				}
+				idx = child_lft->pid();       //is a leaf, and we can get it through primitive[idx]
+				return true;
+			}
+
+			if (overlap_rht && leaf_rht)
+			{
+				idx = child_rht->pid();
+				return true;
+			}
+
+			// Query overlaps an internal node => traverse.
+			bool traverse_lft = (overlap_lft && !leaf_lft);
+			bool traverse_rht = (overlap_rht && !leaf_rht);
+
+			if (!traverse_lft && !traverse_rht)
+				node = *--stackPtr; // pop
+			else
+			{
+				if (traverse_lft && traverse_rht)
+				{
+					node = (glm::distance(point, child_lft->bbox().centroid()) > glm::distance(point, child_rht->bbox().centroid())) ? child_rht : child_lft;
+					*stackPtr++ = (node == child_lft) ? child_rht : child_lft;// push
+				}
+
+				else node = (traverse_lft) ? child_lft : child_rht;
+
+			}
+		} while (node != NULL);
+
+		return false;
+	}
+	__host__ __device__  const Primitive &curpri(unsigned int idx) const
+	{
+		return _primitives_cur[idx];
+	}
+
+	__host__ __device__ const Primitive &lstpri(unsigned int idx) const
+	{
+		return _primitives_lst[idx];
+	}
+
+private:
+	unsigned int _number_faces;
+	unsigned int _number_vertices;
+
+	glm::vec3 *_vertices_lst;
+	glm::vec3 *_vertices_cur;
+
+	Primitive *_primitives_cur;
+	Primitive *_primitives_lst;
+
+	BRTreeNode *_tree_nodes;
+
+#if 0
+public:
+	//��ʾ��Χ��֮ǰ��Ҫ���ã�������ݴ�GPU��CPU�Ŀ���
+	__host__
+		void pre_drawoutline();  //for test
+
+	__host__
+		void draw(BRTreeNode *root);
+
+	__host__
+		void access(BRTreeNode *root, vector<BRTreeNode*>& bad_bode);
+
+private:
+	// �Ժ��ٸģ���Ҫȷ������copyFromGPUtoCPU���ɿռ䣬�ƻ��������ͷŵĶ�ż��ϵ
+	unsigned int _num_internal_node;
+	unsigned int _num_leaf_node;
+
+	BRTreeNode *h_leaf_nodes;
+	BRTreeNode *h_internal_nodes;
+
+public:
+	__host__
+		BRTreeNode *get_leaf_nodes()
+	{
+		copyFromGPUtoCPU((void**)&h_leaf_nodes, _leaf_nodes, _num_leaf_node * sizeof(BRTreeNode));
+		return h_leaf_nodes;
+	}
+
+	__host__
+		BRTreeNode *get_internal_nodes()
+	{
+		copyFromGPUtoCPU((void**)&h_internal_nodes, _internal_nodes, _num_internal_node * sizeof(BRTreeNode));
+		return h_internal_nodes;
+	}
+
+	__host__
+		inline void printLeafNode()
+	{
+		for (int i = 0; i < _num_leaf_node; i++)
+		{
+			_leaf_nodes[i].printInfo();
+		}
+		return;
+	}
+
+	__host__
+		inline void printInternalNode()
+	{
+		for (int i = 0; i < _num_internal_node; i++)
+		{
+			_internal_nodes[i].printInfo();
+		}
+		return;
+	}
+#endif
+	friend class BVHBuilder;
+};
+
+#if 0
+// Allan Yu unused
+// Allan Yu move to bvh.cu
+//data stack overflow when recursively
+static __host__ __device__ bool recursive_intersect(BRTreeNode *_leaf_nodes, BRTreeNode *_internal_nodes, BRTreeNode *root, const glm::vec3 point, int &idx)
+{
+	bool overlap = check_overlap(point, root);
+	if (!overlap)
+		return false;
+	if (is_leaf(root))
+	{
+		idx = root->getIdx();
+		return true;
+	}
+	else
+	{
+		recursive_intersect(_leaf_nodes, _internal_nodes, get_left_child(_leaf_nodes, _internal_nodes, root), point, idx);
+		recursive_intersect(_leaf_nodes, _internal_nodes, get_right_child(_leaf_nodes, _internal_nodes, root), point, idx);
+	}
+}
+#endif
+
diff --git a/bvh/BVHBuilder.cu b/bvh/BVHBuilder.cu
new file mode 100644
index 0000000..4e54871
--- /dev/null
+++ b/bvh/BVHBuilder.cu
@@ -0,0 +1,535 @@
+#include <iostream>
+
+#include "../Utilities.h"
+#include "../Cloth.h"
+
+#include "BRTreeNode.h"
+#include "BVHAccel.h"
+#include "BVHBuilder.h"
+
+#define DEFAULT_THREAD_PER_BLOCK 1024
+
+using std::cout; 
+using std::endl; 
+
+// Expands a 10-bit integer into 30 bits
+// by inserting 2 zeros after each bit.
+__device__ MortonCode d_expandBits(MortonCode v)
+{
+	v = (v * 0x00010001u) & 0xFF0000FFu;
+	v = (v * 0x00000101u) & 0x0F00F00Fu;
+	v = (v * 0x00000011u) & 0xC30C30C3u;
+	v = (v * 0x00000005u) & 0x49249249u;
+	return v;
+}
+
+__device__ MortonCode d_morton3D(float x, float y, float z)
+{
+	x = min(max(x * 1024.0f, 0.0f), 1023.0f);
+	y = min(max(y * 1024.0f, 0.0f), 1023.0f);
+	z = min(max(z * 1024.0f, 0.0f), 1023.0f);
+	MortonCode xx = d_expandBits((MortonCode)x);
+	MortonCode yy = d_expandBits((MortonCode)y);
+	MortonCode zz = d_expandBits((MortonCode)z);
+	return xx * 4 + yy * 2 + zz;
+}
+
+// Calculates a 30-bit Morton code for the
+// given 3D point located within the unit cube [0,1].
+__device__ MortonCode d_morton3D(glm::vec3 p)
+{
+	return d_morton3D(p.x, p.y, p.z);
+}
+
+// Allan Yu move to treebuilder.cu
+__global__ void get_bboxes(const Primitive *primitives_lst, const Primitive *primitives_cur, unsigned int num, BBox *d_bbox)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num)
+		return;
+
+	Primitive primitive_cur = primitives_cur[index];
+	Primitive primitive_lst = primitives_lst[index];
+	BBox bbox = primitive_cur.get_bbox();
+	bbox.expand(primitive_lst.get_bbox());
+	d_bbox[index] = bbox;
+}
+
+// Allan Yu move to treebuilder.cu
+__global__ void get_bboxes(const Primitive *primitives, unsigned int num, BBox *d_bbox)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num)
+		return;
+
+	Primitive primitive = primitives[index];
+	glm::vec3 normal = primitive.get_normal();
+	BBox bbox = primitive.get_bbox();
+	bbox.expand(bbox.offset(normal * -0.02f)); 
+	d_bbox[index] = bbox;
+}
+
+// Allan Yu move to treebuilder.cu
+__global__ void get_bbox(int num, unsigned int m, const BBox *_d_bbox, BBox *d_bb)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num)
+	{
+		return;
+	}
+	int div = m / num;
+	int res = m % num;
+
+	BBox tem;
+	for (int i = 0; i < div; i++)  //use shared to replace
+	{
+		tem.expand(_d_bbox[i * num + index]);
+	}
+	if (index < res)
+	{
+		d_bb[index] = tem; 
+		return; 
+	}
+	tem.expand(_d_bbox[m - res + index]);
+	d_bb[index] = tem;
+	__syncthreads();
+
+	if (index == 0)
+	{
+		for (int i = 0; i < num; i++)
+		{
+			tem.expand(d_bb[i]);
+		}
+		d_bb[0] = tem;
+	}
+
+}
+
+// Allan Yu move to treebuilder.cu
+__global__ void get_mortons(unsigned int num, BBox bb, const BBox *_d_bbox, MortonCode *d_morton)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num)
+		return;
+
+	d_morton[index] = d_morton3D(bb.normalized_pose_of(_d_bbox[index].centroid()));;
+}
+
+// allanyu working here
+/**
+ *delta operator measures the common prefix of two morton_code
+ *if j is not in the range of the sorted_morton_code,
+ *delta operator returns -1.
+*/
+__device__ int delta(int i, int j, MortonCode *sorted_morton_code, int length)
+{
+	if (j < 0 || j >= length)
+	{
+		return -1;
+	}
+	else
+	{
+		return __clz(sorted_morton_code[i] ^ sorted_morton_code[j]);
+	}
+}
+
+/**
+ *determine the range of an internal node
+*/
+__device__ int2 determineRange(MortonCode *sorted_morton_code, int num_leaf_nodes, int i)
+{
+	int d = delta(i, i + 1, sorted_morton_code, num_leaf_nodes) - delta(i, i - 1, sorted_morton_code, num_leaf_nodes);
+	d = d > 0 ? 1 : -1;
+
+	//compute the upper bound for the length of the range
+	int delta_min = delta(i, i - d, sorted_morton_code, num_leaf_nodes);
+	int lmax = 2;
+	while (delta(i, i + lmax * d, sorted_morton_code, num_leaf_nodes) > delta_min)
+	{
+		lmax = lmax * 2;
+	}
+
+	//find the other end using binary search
+	int l = 0;
+	for (int t = lmax / 2; t >= 1; t /= 2)
+	{
+		if (delta(i, i + (l + t) * d, sorted_morton_code, num_leaf_nodes) > delta_min)
+		{
+			l = l + t;
+		}
+	}
+	int j = i + l * d;
+
+	int2 range;
+	if (i <= j) { range.x = i; range.y = j; }
+	else { range.x = j; range.y = i; }
+	return range;
+}
+
+/**
+ *to judge if two values differes
+ *in bit position n
+*/
+__device__ bool is_diff_at_bit(MortonCode val1, MortonCode val2, int n)
+{
+	return val1 >> (31 - n) != val2 >> (31 - n);
+}
+
+/**
+ *find the best split position for an internal node
+*/
+__device__ int findSplit(MortonCode *sorted_morton_code, int start, int last)
+{
+	//return -1 if there is only 
+	//one primitive under this node.
+	if (start == last)
+	{
+		return -1;
+	}
+	else
+	{
+		int common_prefix = __clz(sorted_morton_code[start] ^ sorted_morton_code[last]);
+
+		//handle duplicated morton code separately
+		if (common_prefix == 32)
+		{
+			return (start + last) / 2;
+		}
+
+		// Use binary search to find where the next bit differs.
+		// Specifically, we are looking for the highest object that
+		// shares more than commonPrefix bits with the first one.
+
+		int split = start; // initial guess
+		int step = last - start;
+		do
+		{
+			step = (step + 1) >> 1; // exponential decrease
+			int newSplit = split + step; // proposed new position
+
+			if (newSplit < last)
+			{
+				bool is_diff = is_diff_at_bit(sorted_morton_code[start],
+					sorted_morton_code[newSplit],
+					common_prefix);
+				if (!is_diff)
+				{
+					split = newSplit; // accept proposal
+				}
+			}
+		} while (step > 1);
+
+		return split;
+	}
+}
+
+//FOR BR-TREE CONSTRUCTION
+//TODO: implement internal node processing routine
+//TODO: handle duplicated morton codes as special case (using their position i,j as fallback)
+
+//FOR BVH CONSTRUCTION
+//TODO: implement AABB construction process by go back from the tree node to the root
+//TODO: convert BR-TREE BACK TO BVH
+//TODO: debug
+__global__  void processInternalNode(
+	int num_internal_nodes,
+	MortonCode *sorted_morton_code,
+	BRTreeNode *internalNodes)
+{
+	int idx = blockIdx.x * blockDim.x + threadIdx.x;
+	if (idx >= num_internal_nodes) return;
+
+	// Find out which range of objects the node corresponds to.
+	int2 range = determineRange(sorted_morton_code, num_internal_nodes + 1, idx);
+	int first = range.x;
+	int last = range.y;
+
+	// Determine where to split the range.
+	int split = findSplit(sorted_morton_code, first, last);
+
+	if (split == -1) return;
+
+	// Select childA.
+	int idx_lft = split;
+	if (idx_lft == first) {
+		idx_lft += num_internal_nodes;
+	}
+	BRTreeNode *child_lft = internalNodes + idx_lft;
+
+	// Select childB.
+	int idx_rht = split + 1;
+	if (idx_rht == last) {
+		idx_rht += num_internal_nodes;
+	}
+	BRTreeNode *child_rht = internalNodes + idx_rht;
+
+	// Record parent-child relationships.
+	internalNodes[idx].set_lft(idx_lft);
+	internalNodes[idx].set_rht(idx_rht);
+	child_lft->set_parent(idx);
+	child_rht->set_parent(idx);
+}
+
+/**
+ *construct bounding boxes from leaf up to root
+*/
+__global__  void calculateBoudingBox(
+	int num_internal_nodes,
+	unsigned int *counters,
+	const BBox *bboxes, 
+	BRTreeNode *internalNodes)
+{
+	int idx = blockIdx.x * blockDim.x + threadIdx.x;
+	if (idx >= num_internal_nodes + 1) 
+	{
+		return;
+	}
+
+	// handle leaf first
+	BRTreeNode *node = internalNodes + num_internal_nodes + idx;
+	node->set_bbox(bboxes[idx]); 
+	node->set_idx(idx); 
+
+	//terminate if it is root node
+	int idx_parent = node->parent(); 
+	node = internalNodes + idx_parent;
+
+	int initial_val = atomicInc(counters + idx_parent, 1);
+	while (1)
+	{
+		// terminate the first accesing thread
+		if (initial_val == 0)
+		{
+			return;
+		}
+
+		// calculate bounding box by merging two children's bounding box
+		int idx_lft = node->lft();
+		node->expand(internalNodes[idx_lft].bbox());
+
+		int idx_rht = node->rht();
+		node->expand(internalNodes[idx_rht].bbox());
+
+		//terminate if it is root node
+		idx_parent = node->parent();;
+		if (idx_parent < 0)
+		{
+			return;
+		}
+		node = internalNodes + idx_parent;
+		initial_val = atomicInc(counters + idx_parent, 1);
+	}
+}
+
+/////////////////////////////////////////////////
+/**
+ *intialize parallelBRTreeBuilder by copying the data needed
+ *from host memory (CPU) to device memory (GPU), initialize
+ *data members such as configuration parameters.
+*/
+BVHBuilder::BVHBuilder() : _size(0), _d_morton_codes(NULL), _d_bboxes(NULL)
+{
+
+}
+
+void BVHBuilder::ref_auto_clean() {
+	cudaFree(_d_bboxes);
+	cudaFree(_d_morton_codes);
+	cudaFree(_d_bbox);
+	cudaFree(_d_counters);
+}
+
+void BVHBuilder::prepare_memory(unsigned int size)
+{
+	if (size <= _size )
+	{
+		return; 
+	}
+	ref_renew(); 
+
+	_size = size; 
+
+	_tree_nodes.resize(size * 2 - 1);
+	_bboxes.resize(size);
+	_morton_codes.resize(size);
+	_sorted_bboxes.resize(size);
+	_sorted_morton_codes.resize(size);
+	_sorted_primitives_lst.resize(size);
+	_sorted_primitives_cur.resize(size);
+
+	safe_cuda(cudaMalloc(&_d_bbox, sizeof(BBox) * 128));
+	safe_cuda(cudaMalloc(&_d_morton_codes, size * sizeof(MortonCode)));
+	safe_cuda(cudaMalloc(&_d_bboxes, size * sizeof(BBox)));
+	safe_cuda(cudaMalloc(&_d_counters, size * sizeof(unsigned int)));
+}
+
+// void BVHBuilder::build_impl(bool ccd, BVHAccel &tree, const Primitives &primitives_last, const Primitives &primitives)
+void BVHBuilder::build_impl(bool ccd, BVHAccel &tree, const Mesh &body)
+{
+	if (body.vertices.empty() || body.faces.empty())
+	{
+		cout << "return" << endl; 
+		return; 
+	}
+	unsigned int size = body.faces.size();
+	unsigned int num_vertices = body.vertices.size();
+
+	tree.resize(num_vertices, size);
+
+	Vec3s obj(num_vertices);
+	body.get_euler_coordinates(obj);
+	
+	if (_body_lst.size() != body.faces.size())
+	{
+		compute_primitives(body, _body_lst, tree._vertices_lst);
+		compute_primitives(body, _body_cur, tree._vertices_cur);
+	}
+
+	if (ccd)
+	{
+		safe_cuda(cudaMemcpy(tree._vertices_lst, tree._vertices_cur, sizeof(glm::vec3) * num_vertices, cudaMemcpyDeviceToDevice));
+		safe_cuda(cudaMemcpy(tree._vertices_cur, &obj[0], sizeof(glm::vec3) * obj.size(), cudaMemcpyHostToDevice));
+	}
+	else
+	{
+		safe_cuda(cudaMemcpy(tree._vertices_cur, &obj[0], sizeof(glm::vec3) * obj.size(), cudaMemcpyHostToDevice));
+	}
+
+	// ȷ������������ռ��㹻
+	prepare_memory(size);
+
+	// һ�μ���ÿ��primitive��bbox
+	compute_bboxes(tree, ccd, size, _body_lst, _body_cur);
+
+	// ��ȡ����bb
+	// calculate root AABB size
+	BBox bbox;
+	compute_bbox(size, 128, bbox);
+
+	// ��ȡ����morton code
+	compute_morton_codes(size, bbox);
+
+	// ���� ȥ��
+	compute_sorted_and_unique_codes(tree, ccd, size, _body_lst, _body_cur);
+
+	// cout << "start building parallel brtree" << endl;
+	// delegate the binary radix tree construction process to GPU
+	compute_tree(tree, size);
+}
+
+
+void BVHBuilder::compute_primitives(const Mesh &body, Primitives &h_primitives, glm::vec3 *d_obj_vertices)
+{
+	// create primitives
+	h_primitives.resize(body.faces.size());
+	for (int i = 0; i < h_primitives.size(); ++i)
+	{
+		Primitive primitive(d_obj_vertices, body.faces[i].v0, body.faces[i].v1, body.faces[i].v2);
+		h_primitives[i] = primitive;
+	}
+}
+
+void BVHBuilder::compute_bboxes(
+	BVHAccel &tree,
+	bool ccd,
+	unsigned int size,
+	const Primitives &primitives_last,
+	const Primitives &primitives)
+{
+	unsigned int block_size = 512;
+	unsigned int num_threads = min(block_size, size);
+	unsigned int num_blocks = (size % num_threads != 0) ? (size / num_threads + 1) : (size / num_threads);
+
+	// ��ʼ�ڴ�
+	safe_cuda(cudaMemcpy(tree._primitives_cur, &primitives[0], sizeof(Primitive) * size, cudaMemcpyHostToDevice));
+	if (ccd)
+	{
+		safe_cuda(cudaMemcpy(tree._primitives_lst, &primitives_last[0], sizeof(Primitive) * size, cudaMemcpyHostToDevice));
+	}
+
+	if (ccd)
+	{
+		get_bboxes << <num_blocks, num_threads >> > (tree._primitives_lst, tree._primitives_cur, size, _d_bboxes);
+	}
+	else
+	{
+		get_bboxes << <num_blocks, num_threads >> > (tree._primitives_cur, size, _d_bboxes);
+	}
+	cudaMemcpy(&_bboxes[0], _d_bboxes, sizeof(BBox) * size, cudaMemcpyDeviceToHost);
+}
+
+void BVHBuilder::compute_bbox(
+	unsigned int size,
+	unsigned int num_threads,
+	BBox &bbox)
+{
+#if 0 // cpu
+	BBox bb;
+	for (unsigned int i = 0; i < size; ++i) {
+		bbox.expand(_bboxes[i]);
+	}
+#endif
+
+	get_bbox << <1, num_threads >> > (num_threads, size, _d_bboxes, _d_bbox);
+	safe_cuda(cudaMemcpy(&bbox, _d_bbox, sizeof(BBox), cudaMemcpyDeviceToHost));
+}
+
+void BVHBuilder::compute_morton_codes(unsigned int size, const BBox &bbox)
+{
+	unsigned int block_size = 512;
+	unsigned int num_threads = min(block_size, size);
+	unsigned int num_blocks = (size % num_threads != 0) ? (size / num_threads + 1) : (size / num_threads);
+
+	get_mortons << <num_blocks, num_threads >> > (size, bbox, _d_bboxes, _d_morton_codes);
+	cudaMemcpy(&_morton_codes[0], _d_morton_codes, sizeof(MortonCode) * size, cudaMemcpyDeviceToHost);
+}
+
+void BVHBuilder::compute_sorted_and_unique_codes(BVHAccel &tree, bool ccd, unsigned int &size, const Primitives &primitives_last, const Primitives &primitives)
+{
+	// ����
+	// thrust::sort(thrust::host, primitives.begin(),primitives.end());
+	// cpu is faster than gpu, are u kidding me?
+	vector<unsigned int> indices(size);
+	indices_sort(_morton_codes, indices);
+
+	// ȥ��
+	remove_redundant(_morton_codes, indices);
+	size = indices.size();
+
+	if (ccd)
+	{
+		filter(primitives_last, indices, _sorted_primitives_lst);
+		safe_cuda(cudaMemcpy(tree._primitives_lst, &_sorted_primitives_lst[0], sizeof(Primitive) * size, cudaMemcpyHostToDevice));
+	}
+
+	filter(primitives, indices, _sorted_primitives_cur);
+	filter(_bboxes, indices, _sorted_bboxes);
+	filter(_morton_codes, indices, _sorted_morton_codes);
+
+	safe_cuda(cudaMemcpy(tree._primitives_cur, &_sorted_primitives_cur[0], sizeof(Primitive) * size, cudaMemcpyHostToDevice));
+	safe_cuda(cudaMemcpy(_d_bboxes, &_sorted_bboxes[0], sizeof(BBox) * size, cudaMemcpyHostToDevice));
+	safe_cuda(cudaMemcpy(_d_morton_codes, &_sorted_morton_codes[0], sizeof(MortonCode) * size, cudaMemcpyHostToDevice));
+
+}
+
+void BVHBuilder::compute_tree(BVHAccel &tree, unsigned int size)
+{
+	unsigned int num_internal_nodes = size - 1;
+	unsigned int num_leaf_nodes = size;
+
+	safe_cuda(cudaMemcpy(tree._tree_nodes, &_tree_nodes[0], sizeof(BRTreeNode) * (num_leaf_nodes + num_internal_nodes), cudaMemcpyHostToDevice));
+	safe_cuda(cudaMemset(_d_counters, 0, sizeof(unsigned int) * num_internal_nodes));
+	
+	unsigned int numBlock, threadPerBlock = DEFAULT_THREAD_PER_BLOCK;
+
+	//////////////////////////////////////////////////////////////////
+	// build the bvh
+	numBlock = (num_internal_nodes + DEFAULT_THREAD_PER_BLOCK - 1) / threadPerBlock; 
+	processInternalNode << <numBlock, threadPerBlock >> >(num_internal_nodes, _d_morton_codes, tree._tree_nodes);
+
+	//fix << <1, 1 >> > (d_leaf_nodes, d_internal_nodes);
+
+	//calculate bounding box
+	numBlock = (num_leaf_nodes + DEFAULT_THREAD_PER_BLOCK - 1) / threadPerBlock;
+	calculateBoudingBox << <numBlock, threadPerBlock >> >(num_internal_nodes, _d_counters, _d_bboxes, tree._tree_nodes);
+}
\ No newline at end of file
diff --git a/bvh/BVHBuilder.h b/bvh/BVHBuilder.h
new file mode 100644
index 0000000..eefdd12
--- /dev/null
+++ b/bvh/BVHBuilder.h
@@ -0,0 +1,76 @@
+#pragma once
+
+#include <vector>
+
+#include <cuda_runtime.h>
+
+#include "BBox.h"
+#include "Primitive.h"
+#include "BRTreeNode.h"
+
+using std::vector; 
+
+class Mesh;
+class BVHAccel; 
+
+typedef uint32 MortonCode; 
+typedef std::vector<MortonCode> MortonCodes;
+
+/**
+ *build binary radix tree on GPU
+*/
+class BVHBuilder : public RefObject
+{
+public:
+	BVHBuilder();
+	~BVHBuilder() { ref_release(); }
+
+	void build_bvh(BVHAccel &tree, const Mesh &body)
+	{
+		build_impl(false, tree, body);
+	}
+	void build_ccd(BVHAccel &tree, const Mesh &body)
+	{
+		build_impl(true, tree, body);
+	}
+
+private:
+	virtual void ref_auto_clean();
+
+	void prepare_memory(unsigned int size);
+
+	void build_impl(bool ccd, BVHAccel &tree, const Mesh &body);
+
+	void compute_primitives(const Mesh &body, Primitives &h_primitives, glm::vec3 *d_obj_vertices);
+	void compute_bboxes(BVHAccel &tree, bool ccd, unsigned int size, const Primitives &primitives_last, const Primitives &primitives);
+	void compute_bbox(unsigned int size, unsigned int num_threads, BBox &bbox);
+	void compute_morton_codes(unsigned int size, const BBox &bbox);
+	void compute_sorted_and_unique_codes(BVHAccel &tree, bool ccd, unsigned int &size, const Primitives &primitives_last, const Primitives &primitives);
+	void compute_tree(BVHAccel &tree, unsigned int size);
+
+private:
+	// �����ظ�����ʱ��֤�ռ��㹻
+	unsigned int _size;
+	
+	// ����treebuilder�����Զ������bvh�����޹أ������ڽ�����ʱ�������ʱ����
+	// ֮�������ɳ�Ա�������ǿ��ǿ����ظ�����builder�����ⷴ�����ٿռ�����
+	// ͬʱҲ���Լ��ٺ������������
+	Primitives _body_lst;
+	Primitives _body_cur;
+
+	BRTreeNodes _tree_nodes;
+
+	BBoxes _bboxes;
+	BBoxes _sorted_bboxes;
+
+	MortonCodes _morton_codes;
+	MortonCodes _sorted_morton_codes;
+
+	Primitives _sorted_primitives_lst;
+	Primitives _sorted_primitives_cur;
+
+	BBox *_d_bboxes;
+	BBox *_d_bbox;
+	MortonCode *_d_morton_codes;
+	unsigned int *_d_counters;
+};
diff --git a/bvh/Bbox.cpp b/bvh/Bbox.cpp
new file mode 100644
index 0000000..ae6c578
--- /dev/null
+++ b/bvh/Bbox.cpp
@@ -0,0 +1,47 @@
+#include <iostream>
+
+#include <GL/glew.h>
+
+#include "BBox.h"
+
+using namespace std;
+
+void BBox::draw() const 
+{
+	// top
+	glBegin(GL_LINE_STRIP);
+	glVertex3d(_max.x, _max.y, _max.z);
+	glVertex3d(_max.x, _max.y, _min.z);
+	glVertex3d(_min.x, _max.y, _min.z);
+	glVertex3d(_min.x, _max.y, _max.z);
+	glVertex3d(_max.x, _max.y, _max.z);
+	glEnd();
+
+	// bottom
+	glBegin(GL_LINE_STRIP);
+	glVertex3d(_min.x, _min.y, _min.z);
+	glVertex3d(_min.x, _min.y, _max.z);
+	glVertex3d(_max.x, _min.y, _max.z);
+	glVertex3d(_max.x, _min.y, _min.z);
+	glVertex3d(_min.x, _min.y, _min.z);
+	glEnd();
+
+	// side
+	glBegin(GL_LINES);
+	glVertex3d(_max.x, _max.y, _max.z);
+	glVertex3d(_max.x, _min.y, _max.z);
+	glVertex3d(_max.x, _max.y, _min.z);
+	glVertex3d(_max.x, _min.y, _min.z);
+	glVertex3d(_min.x, _max.y, _min.z);
+	glVertex3d(_min.x, _min.y, _min.z);
+	glVertex3d(_min.x, _max.y, _max.z);
+	glVertex3d(_min.x, _min.y, _max.z);
+	glEnd();
+
+}
+
+void BBox::print() const
+{
+	cout << _min.x << "  " << _min.y << " " << _min.z << ";  ";
+	cout << _max.x << "  " << _max.y << " " << _max.z << endl;
+}
\ No newline at end of file
diff --git a/bvh/Bbox.h b/bvh/Bbox.h
new file mode 100644
index 0000000..ed593cc
--- /dev/null
+++ b/bvh/Bbox.h
@@ -0,0 +1,131 @@
+#pragma once
+#include <vector>
+
+#include <cuda_runtime.h>
+#include <glm/glm.hpp>
+
+#include "../Common.h"
+
+#define INF_D 100.0
+
+class BBox
+{
+public:
+	/**
+	* Constructor.
+	* The default constructor creates a new bounding box which contains no
+	* points.
+	*/
+	__host__ __device__ BBox() : _max(-INF_D), _min(INF_D), _extent(-2 * INF_D) { }
+
+	/**
+	 *Constructor.
+	 *Creates a bounding box that includes a single point.
+	*/
+	__host__ __device__ BBox(const glm::vec3 &p) : _min(p), _max(p) { }
+
+	/**
+	 *Constructor.
+	 *Creates a bounding box with given bounds.
+	 *\param _min the _min corner
+	 *\param _max the _max corner
+	*/
+	__host__ __device__ BBox(const glm::vec3 &min, const glm::vec3 &max) : _min(min), _max(max) 
+	{
+		_extent = _max - _min;
+	}
+
+	/**
+	 *Constructor.
+	 *Creates a bounding box with given bounds (component wise).
+	*/
+	__host__ __device__ BBox(
+		const double x1, const double y1, const double z1, 
+		const double x2, const double y2, const double z2) : 
+		_min(x1, y1, z1), _max(x2, y2, z2)
+	{
+		_extent = _max - _min;
+	}
+
+	/**
+	 *Expand the bounding box to include another (union).
+	 *If the given bounding box is contained within *this*, nothing happens.
+	 *Otherwise *this *is expanded to the minimum volume that contains the
+	 *given input.
+	 *\param bbox the bounding box to be included
+	*/
+	__host__ __device__ void expand(const BBox &rhs) 
+	{
+		_min = glm::min(_min, rhs._min);
+		_max = glm::max(_max, rhs._max);
+		_extent = _max - _min;
+	}
+
+	/**
+	 *Intersects point with bounding box, does not store shading information.
+	Checking if a point is inside an AABB is pretty simple �� we just need to check whether the point's coordinates fall inside the AABB;
+	considering each axis separately. If we assume that Px, Py and Pz are the point's coordinates,
+	and BminX�CBmaxX, BminY�CBmaxY, and BminZ�CBmaxZ are the ranges of each exis of the AABB,
+	we can calculate whether a collision has occured between the two using the following formula:
+
+	f(P,B)=(Px>=BminX��Px<=BmaxX)��(Py>=BminY��Py<=BmaxY)��(Pz>=BminZ��Pz<=BmaxZ)
+	*/
+	__host__ __device__ bool intersect(const glm::vec3 &p) const
+	{
+		return (p.x >= _min.x && p.x <= _max.x) && 
+			(p.y >= _min.y && p.y <= _max.y) &&
+			(p.z >= _min.z && p.z <= _max.z);
+	}
+
+	__host__ __device__ glm::vec3 centroid() const {
+		glm::vec3 sum = _min + _max;
+		sum /= 2;
+		return sum;
+	}
+
+	__host__ __device__ BBox offset(const glm::vec3 &p) const
+	{
+		BBox box = *this; 
+		box._max += p; 
+		box._min += p;
+
+		return box; 
+	}
+
+	/**
+	*Calculate and return an object's
+	*normalized position in the unit
+	*cube defined by this BBox. if the
+	*object is not inside of the BBox, its
+	*position will be clamped into the BBox.
+	*
+	*\param pos the position to be evaluated
+	*\return the normalized position in the unit
+	*cube, with x,y,z ranging from [0,1]
+	*/
+	__host__ __device__ glm::vec3 normalized_pose_of(glm::vec3 p)
+	{
+		if (_extent.x == 0 || _extent.y == 0 || _extent.z == 0)
+		{
+			return glm::vec3();
+		}
+		glm::vec3 o2pos = p - _min;
+		o2pos /= _extent;
+		return o2pos;
+	}
+
+	/**
+	 *Draw box wireframe with OpenGL.
+	*/
+	void draw() const;
+	void print() const;
+
+	
+private:
+	glm::vec3 _max;		///< _max corner of the bounding box
+	glm::vec3 _min;	    ///< _min corner of the bounding box
+	glm::vec3 _extent;	///< _extent of the bounding box (_min -> _max)
+
+};
+
+typedef std::vector<BBox> BBoxes;
\ No newline at end of file
diff --git a/bvh/Primitive.h b/bvh/Primitive.h
new file mode 100644
index 0000000..5f374ce
--- /dev/null
+++ b/bvh/Primitive.h
@@ -0,0 +1,103 @@
+#pragma once
+#include <vector>
+
+#include <glm/glm.hpp>
+
+#include "../Common.h"
+#include "BBox.h"
+
+// here primitive refer to triangle
+class Primitive
+{
+public:
+	Primitive() : _vertices(NULL) { };
+	Primitive(
+		const glm::vec3 *vertices, 
+		const size_t v0, 
+		const size_t v1, 
+		const size_t v2
+	) : _vertices(vertices), _v0(v0), _v1(v1), _v2(v2) { }
+	
+	/**
+	 *Get the world space bounding box of the primitive.
+	 *\return world space bounding box of the primitive
+	*/
+	__host__ __device__ const glm::vec3 &v0() const
+	{
+		return _vertices[_v0]; 
+	}
+
+	__host__ __device__ const glm::vec3 &v1() const
+	{
+		return _vertices[_v1];
+	}
+
+	__host__ __device__ const glm::vec3 &v2() const
+	{
+		return _vertices[_v2];
+	}
+
+	__host__ __device__ BBox get_bbox() const
+	{
+		BBox bbox(v0());
+		bbox.expand(v1());
+		bbox.expand(v2());
+		return bbox;
+	};
+
+	/**
+	 *Check if the given point intersects with the primitive, no intersection
+	 *information is stored
+	 *\return true if the given point intersects with the primitive,
+	false otherwise
+	*/
+	__host__ __device__ float distance_to(const glm::vec3 &point, glm::vec3 &normal) const
+	{
+		// use normal or barycentric coordinates
+		normal = get_normal();
+		
+		glm::vec3 tem = point - _vertices[_v0];
+		return glm::dot(tem, normal);
+	}
+
+	__host__ __device__ float distance_to(const glm::vec3 &point) const
+	{
+		// use normal or barycentric coordinates
+		glm::vec3 normal = get_normal();
+
+		glm::vec3 tem = point - _vertices[_v0];
+		return glm::dot(tem, normal);
+	}
+
+	__host__ __device__ float udistance_to(const glm::vec3 &point) const
+	{
+		// use normal or barycentric coordinates
+		return fabs(udistance_to(point)); 
+	}
+
+	__host__ __device__ glm::vec3 get_normal() const
+	{
+		glm::vec3 v = v0(); 
+		glm::vec3 side1 = v1() - v;
+		glm::vec3 side2 = v2() - v;
+		glm::vec3 normalface = glm::cross(side1, side2);
+		return glm::normalize(normalface);
+	}
+
+	__host__ __device__ glm::vec3 get_center() const
+	{
+		glm::vec3 center = v0();
+		center += v1();
+		center += v2();
+		center /= 3.0f;
+		return center;
+	}
+
+private:
+	const glm::vec3 *_vertices;  //for device, ptr to _vertices
+	size_t _v0;
+	size_t _v1;
+	size_t _v2;
+};
+
+typedef std::vector<Primitive> Primitives;
\ No newline at end of file
diff --git a/detectBody.h b/detectBody.h
new file mode 100644
index 0000000..a119f97
--- /dev/null
+++ b/detectBody.h
@@ -0,0 +1,127 @@
+#pragma once
+#include <Kinect.h>  
+#include<iostream>  
+#include<time.h>  
+#include<fstream>
+#include"generateBody.h"
+#include"KinectJointFilter.h"
+
+#define Joint_count 24
+#pragma comment(lib, "kinect20.lib") 
+using namespace std;
+
+template<class Interface>
+inline void SafeRelease(Interface *& pInterfaceToRelease)
+{
+	if (pInterfaceToRelease != NULL) {
+		pInterfaceToRelease->Release();
+		pInterfaceToRelease = NULL;
+	}
+}
+mat JointTransform(Joint *prejoint);
+
+bool detectJoint(HRESULT hResult, IBodyFrameReader *pBodyReader,mat &trans_joint)
+{
+	IBodyFrame *pBodyFrame = nullptr;
+	hResult = pBodyReader->AcquireLatestFrame(&pBodyFrame);
+	if (SUCCEEDED(hResult)) {
+		IBody *pBody[BODY_COUNT] = { 0 };
+
+		hResult = pBodyFrame->GetAndRefreshBodyData(BODY_COUNT, pBody);
+		if (SUCCEEDED(hResult)) {
+			for (int count = 0; count < BODY_COUNT; count++) {
+				BOOLEAN bTracked = false;
+				hResult = pBody[count]->get_IsTracked(&bTracked);
+				if (SUCCEEDED(hResult) && bTracked) {
+					Joint joint[JointType::JointType_Count];
+					/////////////////////////////     
+					hResult = pBody[count]->GetJoints(JointType::JointType_Count, joint);//joint  
+
+					trans_joint = JointTransform(joint);
+
+					cout <<  " OK" << endl;
+					return TRUE;
+				}
+			}
+		}
+		for (int count = 0; count < BODY_COUNT; count++) {
+			SafeRelease(pBody[count]);
+		}
+	}
+	SafeRelease(pBodyFrame);
+	return FALSE;
+}
+
+
+//Joint transform
+
+mat JointTransform(Joint *prejoint)
+{
+	mat tmp = zeros(24,3);
+
+	//get index0
+	tmp(0, 0) = prejoint[0].Position.X;
+	tmp(1, 0) = prejoint[12].Position.X;
+	tmp(2, 0) = prejoint[16].Position.X;
+
+	tmp(4, 0) = prejoint[13].Position.X;
+	tmp(5, 0) = prejoint[17].Position.X;
+	tmp(6, 0) = prejoint[1].Position.X;
+	tmp(7, 0) = prejoint[14].Position.X;
+	tmp(8, 0) = prejoint[18].Position.X;
+
+	tmp(12, 0) = prejoint[2].Position.X;
+
+	tmp(15, 0) = prejoint[3].Position.X;
+	tmp(16, 0) = prejoint[4].Position.X;
+	tmp(17, 0) = prejoint[8].Position.X;
+	tmp(18, 0) = prejoint[5].Position.X;
+	tmp(19, 0) = prejoint[9].Position.X;
+	tmp(20, 0) = prejoint[6].Position.X;
+	tmp(21, 0) = prejoint[10].Position.X;
+
+
+
+	//get index1
+	tmp(0, 1) = prejoint[0].Position.Y;
+	tmp(1, 1) = prejoint[12].Position.Y;
+	tmp(2, 1) = prejoint[16].Position.Y;
+
+	tmp(4, 1) = prejoint[13].Position.Y;
+	tmp(5, 1) = prejoint[17].Position.Y;
+	tmp(6, 1) = prejoint[1].Position.Y;
+	tmp(7, 1) = prejoint[14].Position.Y;
+	tmp(8, 1) = prejoint[18].Position.Y;
+
+	tmp(12, 1) = prejoint[2].Position.Y;
+
+	tmp(15, 1) = prejoint[3].Position.Y;
+	tmp(16, 1) = prejoint[4].Position.Y;
+	tmp(17, 1) = prejoint[8].Position.Y;
+	tmp(18, 1) = prejoint[5].Position.Y;
+	tmp(19, 1) = prejoint[9].Position.Y;
+	tmp(20, 1) = prejoint[6].Position.Y;
+	tmp(21, 1) = prejoint[10].Position.Y;
+
+	//get index2
+	tmp(0, 2) = prejoint[0].Position.Z;
+	tmp(1, 2) = prejoint[12].Position.Z;
+	tmp(2, 2) = prejoint[16].Position.Z;
+
+	tmp(4, 2) = prejoint[13].Position.Z;
+	tmp(5, 2) = prejoint[17].Position.Z;
+	tmp(6, 2) = prejoint[1].Position.Z;
+	tmp(7, 2) = prejoint[14].Position.Z;
+	tmp(8, 2) = prejoint[18].Position.Z;
+
+	tmp(12, 2) = prejoint[2].Position.Z;
+
+	tmp(15, 2) = prejoint[3].Position.Z;
+	tmp(16, 2) = prejoint[4].Position.Z;
+	tmp(17, 2) = prejoint[8].Position.Z;
+	tmp(18, 2) = prejoint[5].Position.Z;
+	tmp(19, 2) = prejoint[9].Position.Z;
+	tmp(20, 2) = prejoint[6].Position.Z;
+	tmp(21, 2) = prejoint[10].Position.Z;
+	return tmp;
+}
\ No newline at end of file
diff --git a/generateBody.cpp b/generateBody.cpp
new file mode 100644
index 0000000..33d3500
--- /dev/null
+++ b/generateBody.cpp
@@ -0,0 +1,439 @@
+#include"generateBody.h"
+
+
+
+void arma_to_GL(mat &v, Mesh &body) {
+	for (int i = 0; i < v.n_rows; i++)
+	{
+		body.vertices[i] = glm::vec4(v(i, 0), v(i, 1), v(i, 2), 1.0f);
+		body.normals[i] = glm::vec3(v(i, 3), v(i, 4), v(i, 5));
+	}
+
+}
+
+void arma_to_vector(mat &v, Vec4s &vertices, Vec3s &normals) {
+	for (int i = 0; i < v.n_rows; i++)
+	{
+		vertices.push_back(glm::vec4(v(i, 0), v(i, 1), v(i, 2), 1.0f));
+		normals.push_back(glm::vec3(v(i, 3), v(i, 4), v(i, 5)));
+	}
+
+}
+
+SMPL::SMPL(int gender) {
+	string basedir = "models";
+	idd[NEUTRAL] = "neutral";
+	idd[MALE] = "male";
+	idd[FEMALE] = "female";
+	string genderdir = idd[gender];
+
+	f.load(basedir + "/" + genderdir + "/f.txt");
+	JJ.load(basedir + "/" + genderdir + "/J.txt");
+	J_regressor.load(basedir + "/" + genderdir + "/J_regressor.txt");
+	kintree_table.load(basedir + "/" + genderdir + "/kintree_table.txt");
+	posedirs.load(basedir + "/" + genderdir + "/posedirs.txt");
+	shapedirs.load(basedir + "/" + genderdir + "/shapedirs.txt");
+	v_template.load(basedir + "/" + genderdir + "/v_template.txt");
+	weights.load(basedir + "/" + genderdir + "/weights.txt");
+	n_template.load(basedir + "/" + genderdir + "/n_template.txt");
+	kpart.load(basedir + "/" + genderdir + "/kpart.txt");
+	Kinect_J_template.load(basedir + "/" + genderdir + "/Kinect_J_template.txt");
+	pose_num = weights.n_cols;
+	shape_num = shapedirs.n_cols;
+	vert_num = v_template.n_rows;
+	face_num = f.n_rows;
+	Kinect_J_template.col(0) = Kinect_J_template.col(0);
+
+	for (int i = 0; i < kintree_table.n_cols; i++) {
+		id_to_col[kintree_table(1, i)] = i;
+	}
+	for (int i = 1; i < kintree_table.n_cols; i++) {
+		parent[i] = id_to_col[kintree_table(0, i)];
+	}
+	mat r0;
+	r0 = eye(3, 3);
+
+	R0 = repmat(r0, 1, pose_num - 1);
+}
+
+mat SMPL::with_zeros(mat &A) {
+	mat zero01;
+	zero01 << 0 << 0 << 0 << 1 << endr;
+	zero01.reshape(1, 4);
+	mat res = join_vert(A, zero01);
+	return res;
+}
+
+mat SMPL::pack(mat &A) {
+	mat AA = A;
+	AA.reshape(4, 1);
+	mat zero43 = zeros(4, 3);
+	return join_horiz(zero43, AA);
+}
+
+mat SMPL::Exp(mat &w) {
+	double num = sqrt(w(0)*w(0) + w(1)*w(1) + w(2)*w(2));
+	if (num > 0.00000000000000000001) {
+		mat ww = w / num;
+		mat res;
+		res << 0 << -ww[2] << ww[1] << endr
+			<< ww[2] << 0 << -ww[0] << endr
+			<< -ww[1] << ww[0] << 0 << endr;
+		return eye(3, 3) + sin(num)*res + (1.0 - cos(num))*(res*res);
+
+	}
+	else {
+		return eye(3, 3);
+	}
+}
+
+mat SMPL::vector_to_mat(vector<mat> res) {
+	uword row = res[0].n_rows;
+	uword col = res[0].n_cols;
+	uword size = res.size();
+	mat result = zeros(size, row*col);
+	int i = 0;
+	mat tmp;
+	for (auto it = res.begin(); it < res.end(); it++) {
+		tmp = *it;
+		tmp.reshape(1, row*col);
+		result.row(i) = tmp;
+		i++;
+
+	}
+	return result;
+
+}
+
+mat SMPL::compute_n(mat j1, mat j2, mat j3) {
+	mat J1 = j2 - j1;
+	mat J2 = j3 - j1;
+	return cross(J1, J2);
+}
+
+mat SMPL::compute_t(mat x_t, mat x) {
+	mat axis = -cross(x, x_t);
+	axis = axis / (norm(axis) + 0.0000000000000001);
+	double tmp1 = dot(x, x_t);
+	axis = axis *acos(tmp1 / (norm(x_t)*norm(x) + 0.00000000001));
+	return axis;
+
+}
+
+mat SMPL::R_to_t(mat R) {
+	double tr = trace(R);
+	if (tr > 3.0)
+		tr = 3.0;
+	else if (tr < -1.0)
+		tr = -1.0;
+	double theta = acos((tr - 1.0) / 2.);
+	mat pp;
+	pp << R(2, 1) - R(1, 2) << R(0, 2) - R(2, 0) << R(1, 0) - R(0, 1) << endr;
+	pp = (abs(theta) / (2 * sin(abs(theta)) + 0.00000000001))*pp;
+	return pp;
+}
+
+mat SMPL::J_to_pose(mat J) {
+	J.col(0) = J.col(0);
+	vector<mat> R;
+	//R.push_back(eye(3, 3));
+	mat n_t = compute_n(Kinect_J_template.row(0), Kinect_J_template.row(16), Kinect_J_template.row(17));
+	mat n = compute_n(J.row(0), J.row(16), J.row(17));
+	mat ax = compute_t(n_t, n);
+	R.push_back(Exp(ax));
+	for (int i = 1; i < kpart.n_rows; i++) {
+		int k = int(kpart(i, 1));
+		if (k == -1) {
+			R.push_back(zeros(3, 3));
+		}
+		else {
+			mat j_t = Kinect_J_template.row(kpart(i, 1)) - Kinect_J_template.row(kpart(i, 0));
+			mat j1 = J.row(kpart(i, 0));
+			mat j2 = J.row(kpart(i, 1));
+			if (norm(j1 - zeros(1, 3)) > 0.000001&& norm(j2 - zeros(1, 3)) > 0.000001) {
+				mat j = j2 - j1;
+				mat axis = compute_t(j_t, j);
+				R.push_back(Exp(axis));
+			}
+			else {
+				R.push_back(zeros(3, 3));
+			}
+		}
+	}
+	mat t = zeros(kpart.n_rows, 3);
+	t.row(0) = R_to_t(R[0]);
+	int pp;
+	mat rel_R, tmp;
+	for (int i = 1; i < kpart.n_rows; i++) {
+		if (norm(R[i] - zeros(3, 3)) < 0.00000001) {
+			t.row(i) = zeros(1, 3);
+		}
+		else {
+			pp = kintree_table(0, i);
+			while (norm(R[pp] - zeros(3, 3)) < 0.00000001) {
+				pp = kintree_table(0, pp);
+
+			}
+			tmp = trans(R[pp]) *R[i];
+
+			t.row(i) = R_to_t(tmp);
+
+
+		}
+	}
+	mat t_new = join_horiz(-t.col(0), t.col(1));
+	t_new = join_horiz(t_new, -t.col(2));
+
+
+	return t_new;
+}
+
+mat SMPL::global_rigid_transformation(mat &pose, mat &J) {
+
+	vector<mat> results, results2;
+
+	mat zero3 = J.row(0);
+	zero3.reshape(3, 1);
+	mat first = pose.row(0);
+	first = Exp(first);
+	first = join_horiz(first, zero3);
+	first = with_zeros(first);
+	results.push_back(first);
+	mat tmp, tmp_pose, tmp_j;
+
+	for (int i = 1; i < kintree_table.n_cols; i++) {
+		tmp_pose = pose.row(i);
+		tmp_pose = Exp(tmp_pose);
+		tmp_j = J.row(i) - J.row(parent[i]);
+		tmp_j.reshape(3, 1);
+		tmp = join_horiz(tmp_pose, tmp_j);
+		tmp = with_zeros(tmp);
+		tmp = results[parent[i]] * tmp;
+		results.push_back(tmp);
+	}
+	int i = 0;
+	for (auto it = results.begin(); it < results.end(); it++) {
+		tmp = zeros(4, 1);
+		tmp_j = J.row(i);
+		tmp_j.reshape(3, 1);
+		tmp.rows(0, 2) = tmp_j;
+		tmp = (*it)*tmp;
+		tmp = pack(tmp);
+		results2.push_back(*it - tmp);
+
+		i++;
+	}
+	mat A = vector_to_mat(results2);
+	return A;
+
+}
+
+
+mat SMPL::verts_core(mat &pose, mat &v, mat &J, bool want_norm) {
+	mat A = global_rigid_transformation(pose, J);
+	mat T = weights*A;
+
+	mat temp_v = trans(v);
+	mat one_row = ones(1, v.n_rows);
+
+	mat rest_shape = join_vert(temp_v, one_row);
+	mat sum1 = T.col(0) % trans(rest_shape.row(0))
+		+ T.col(4) % trans(rest_shape.row(1))
+		+ T.col(8) % trans(rest_shape.row(2))
+		+ T.col(12) % trans(rest_shape.row(3));
+	mat sum2 = T.col(1) % trans(rest_shape.row(0))
+		+ T.col(5) % trans(rest_shape.row(1))
+		+ T.col(9) % trans(rest_shape.row(2))
+		+ T.col(13) % trans(rest_shape.row(3));
+	mat sum3 = T.col(2) % trans(rest_shape.row(0))
+		+ T.col(6) % trans(rest_shape.row(1))
+		+ T.col(10) % trans(rest_shape.row(2))
+		+ T.col(14) % trans(rest_shape.row(3));
+	/*mat sum4 = T.col(3) % trans(rest_shape.row(0))
+	+ T.col(7) % trans(rest_shape.row(1))
+	+ T.col(11) % trans(rest_shape.row(2))
+	+ T.col(15) % trans(rest_shape.row(3));
+	*/
+	mat sum = join_horiz(sum1, sum2);
+	sum = join_horiz(sum, sum3);
+
+	mat result = sum;
+	if (want_norm) {
+		mat zero_row = zeros(1, v.n_rows);
+		mat n_ref = join_vert(trans(n_template), zero_row);
+		mat n_sum1 = T.col(0) % trans(n_ref.row(0))
+			+ T.col(4) % trans(n_ref.row(1))
+			+ T.col(8) % trans(n_ref.row(2))
+			+ T.col(12) % trans(n_ref.row(3));
+		mat n_sum2 = T.col(1) % trans(n_ref.row(0))
+			+ T.col(5) % trans(n_ref.row(1))
+			+ T.col(9) % trans(n_ref.row(2))
+			+ T.col(13) % trans(n_ref.row(3));
+		mat n_sum3 = T.col(2) % trans(n_ref.row(0))
+			+ T.col(6) % trans(n_ref.row(1))
+			+ T.col(10) % trans(n_ref.row(2))
+			+ T.col(14) % trans(n_ref.row(3));
+		mat n_sum = join_horiz(n_sum1, n_sum2);
+		n_sum = join_horiz(n_sum, n_sum3);
+		mat mm = n_sum.col(0) % n_sum.col(0) + n_sum.col(1) % n_sum.col(1) + n_sum.col(2) % n_sum.col(2);
+		mm = pow(mm, 0.5);
+		n_sum = n_sum / repmat(mm, 1, 3);
+		result = join_horiz(sum, n_sum);
+	}
+	return result;
+
+}
+
+void SMPL::write_to_obj(mat &v, string fname) {
+	fstream obj;
+	obj.open(fname, ios_base::out);
+
+	for (int i = 0; i < v.n_rows; i++)
+	{
+		obj << "v ";
+		for (int j = 0; j < 3; j++) {
+			obj << v(i, j) << " ";
+
+		}
+		obj << endl;
+	}
+	if (v.n_cols == 6) {
+		for (int i = 0; i < v.n_rows; i++)
+		{
+			obj << "vn ";
+			for (int j = 3; j < 6; j++) {
+				obj << v(i, j) << " ";
+
+			}
+			obj << endl;
+		}
+	}
+	mat ff = f;
+	ff++;
+
+	for (int i = 0; i < f.n_rows; i++)
+	{
+		obj << "f ";
+		for (int j = 0; j < f.n_cols; j++) {
+			obj << ff(i, j) << " ";
+		}
+		obj << endl;
+	}
+	obj.close();
+}
+
+mat SMPL::gen_pose_model(mat &pose, bool want_norm) {
+	mat J = J_regressor*v_template;
+	mat R = zeros(3, (pose.n_rows - 1) * 3);
+	mat tmp;
+	for (int i = 0; i < pose.n_rows - 1; i++) {
+		tmp = pose.row(i + 1);
+		tmp = Exp(tmp);
+
+		R.cols(3 * i, 3 * i + 2) = trans(tmp);
+	}
+	mat B = R - R0;
+	B.reshape(1, 9 * (pose_num - 1));
+	B = B*trans(posedirs);
+	B.reshape(3, vert_num);
+	B = trans(B);
+	B = B + v_template;
+	return verts_core(pose, B, J, want_norm);
+
+}
+
+mat SMPL::gen_full_model(mat &pose, mat &betas, bool want_norm) {
+	mat S = shapedirs*betas;
+	S.reshape(3, vert_num);
+	S = trans(S);
+	mat tmp;
+
+
+	mat v = v_template + S;
+	mat J = J_regressor*v;
+	mat R = zeros(3, (pose.n_rows - 1) * 3);
+
+	for (int i = 0; i < pose.n_rows - 1; i++) {
+		tmp = pose.row(i + 1);
+		tmp = Exp(tmp);
+
+		R.cols(3 * i, 3 * i + 2) = trans(tmp);
+	}
+	mat B = R - R0;
+	B.reshape(1, 9 * (pose_num - 1));
+	B = B*trans(posedirs);
+	B.reshape(3, vert_num);
+	B = trans(B);
+	v = v + B;
+	return verts_core(pose, v, J, want_norm);
+}
+
+
+void genBody(mat JJ, Mesh &body, SMPL &obj)
+{
+	clock_t start = clock();
+	//SMPL obj = SMPL(NEUTRAL);
+	clock_t point1 = clock();
+	cout << "clock: " << point1 - start << endl;
+	mat pose;
+
+
+	clock_t point2 = clock();
+
+	pose = obj.J_to_pose(JJ);
+
+	mat tmp1 = obj.J_regressor;
+	mat tmp2 = obj.v_template;
+	mat tmp = tmp1*tmp2;
+	//pose = zeros(24, 3);
+	//pose(1, 2) = 0.6;pose(16, 2) = 0.6;pose(21, 2) = 0.6;pose(10, 2) = 0.6;
+	mat result = obj.gen_pose_model(pose, true);
+	clock_t point3 = clock();
+	cout << "clock: " << point3 - point2 << endl;
+	cout << "DONE" << endl;
+	//obj.write_to_obj(result, "Female_template.obj");
+	arma_to_GL(result, body);
+	//std::cin.get();
+}
+
+
+void genBodyVector(mat JJ, Vec4s &vertices, Vec3s &normals, SMPL &obj)
+{
+	//clock_t start = clock();
+	//clock_t point1 = clock();
+	//cout << "clock: " << point1 - start << endl;
+	mat pose;
+	////pose.row(1) = ones(1, 3)*0.3;
+
+	clock_t point2 = clock();
+
+	pose = obj.J_to_pose(JJ);
+
+	mat tmp1 = obj.J_regressor;
+	mat tmp2 = obj.v_template;
+	mat tmp = tmp1*tmp2;
+	//pose = zeros(24, 3);
+	//pose(1, 2) = 0.6;pose(16, 2) = 0.6;pose(21, 2) = 0.6;pose(10, 2) = 0.6;
+	mat result = obj.gen_pose_model(pose, true);
+	//clock_t point3 = clock();
+	//cout << "clock: " << point3 - point2 << endl;
+	//cout << "DONE" << endl;
+	//obj.write_to_obj(result, "Female_template.obj");
+	arma_to_vector(result, vertices, normals);
+}
+
+void genFirstBody(mat pose, Vec4s &vertices, Vec3s &normals, SMPL &obj)
+{
+	//mat tmp1 = obj.J_regressor;
+	//mat tmp2 = obj.v_template;
+	//mat tmp = tmp1*tmp2;
+	//pose = zeros(24, 3);
+	//pose(1, 2) = 0.6;pose(16, 2) = 0.6;pose(21, 2) = 0.6;pose(10, 2) = 0.6;
+	mat result = obj.gen_pose_model(pose, true);
+
+	vertices.clear();
+	normals.clear();
+
+	//obj.write_to_obj(result, "Female_template.obj");
+	arma_to_vector(result, vertices, normals);
+}
\ No newline at end of file
diff --git a/generateBody.h b/generateBody.h
new file mode 100644
index 0000000..782cad2
--- /dev/null
+++ b/generateBody.h
@@ -0,0 +1,61 @@
+#pragma once
+
+#include <iostream>
+#include <armadillo>
+#include <vector>
+#include <string>
+#include <fstream>
+#include <map>
+#include <time.h>
+#include <glm/glm.hpp>
+
+#include "Common.h"
+#include "Mesh.h"
+#include "ObjLoader.h"
+
+//#pragma comment(lib, "libmat.lib")
+//#pragma comment(lib,"libmx.lib")
+//#pragma comment(lib, "libmex.lib  ")
+//#pragma comment(lib,"libeng.lib ")
+#pragma comment(lib, "blas_win64_MT.lib  ")
+#pragma comment(lib,"lapack_win64_MT.lib ")
+
+using namespace std;
+using namespace arma;
+
+#define NEUTRAL 0
+#define MALE 1
+#define FEMALE -1
+
+class SMPL {
+public:
+	mat f, JJ, J_regressor, kintree_table, posedirs, shapedirs, v_template, weights, n_template, Kinect_J_template;
+	uword joints_num, pose_num, shape_num, vert_num, face_num;
+	SMPL(int gender = NEUTRAL);
+	void write_to_obj(mat &v, string fname);
+	mat gen_pose_model(mat &pose, bool want_norm = false);
+	mat gen_full_model(mat &pose, mat &shape, bool want_norm = false);
+
+	mat J_to_pose(mat J);
+	mat Exp(mat &w);
+	mat vector_to_mat(vector<mat> res);
+	mat compute_n(mat j1, mat j2, mat j3);
+	mat compute_t(mat x_t, mat x);
+	mat R_to_t(mat R);
+	mat global_rigid_transformation(mat &pose, mat &J);
+	mat verts_core(mat &pose, mat &v, mat &J, bool want_norm = false);
+	void arma_to_GL(mat &v, Mesh &body);
+private:
+	mat kpart;
+	map<int, string> idd;
+	map<int, int> id_to_col, parent;
+	mat R0;
+	mat with_zeros(mat &A);
+	mat pack(mat &A);
+};
+
+void genBody(mat JJ, Mesh &body,SMPL &obj);
+void genBodyVector(mat JJ, Vec4s &vertices, Vec3s &normals, SMPL &obj);
+void arma_to_GL(mat &v, Mesh &body);
+void arma_to_vector(mat &v, Vec4s &vertices, Vec3s &normals);
+void genFirstBody(mat pose, Vec4s &vertices, Vec3s &normals, SMPL &obj);
diff --git a/kdtree.cpp b/kdtree.cpp
new file mode 100644
index 0000000..69f28a1
--- /dev/null
+++ b/kdtree.cpp
@@ -0,0 +1,949 @@
+
+//ͷ�ļ�  
+
+#include <stdio.h>  
+#include <stdlib.h>  
+#include <string.h>  
+#include <math.h>  
+#include "kdtree.h"  
+
+
+//#if defined(WIN32) || defined(__WIN32__)  
+#include <malloc.h>  
+//#endif  
+  
+#ifdef USE_LIST_NODE_ALLOCATOR  
+  
+#ifndef NO_PTHREADS  
+#include <pthread.h>  
+#else  
+  
+#ifndef I_WANT_THREA BUGS  
+#error "You are compiling with the fast list node allocator, with pthreads disabled! This WILL break if used from multiple threads."  
+#endif  /*I want thread bugs */  
+  
+#endif  /*pthread support */  
+#endif  /*use list node allocator */  
+  
+  
+//��ƽ��Ľṹ��  
+//����һ�����Ե�ά����ÿά�����������Сֵ���ɵ�����  
+struct kdhyperrect {  
+    int dim;  
+    double *min, *max;              /*minimum/maximum coords */  
+};  
+  
+//�ڵ�Ľṹ�壬Ҳ���������Ľṹ��  
+struct kdnode {  
+    float *pos;  
+    int dir;  
+    void *data;  
+  
+    struct kdnode *left, *right;    /*negative/positive side */  
+};  
+  
+//���ؽ���ڵ㣬 �������Ľڵ�,����ֵ, ��һ������������ʽ  
+struct res_node {  
+    struct kdnode *item;  
+    double dist_sq;  
+    struct res_node *next;  
+};  
+  
+//���м������ԣ�һ��ά����һ�������ڵ㣬һ�ǳ�ƽ�棬һ������data�ĺ���  
+struct kdtree {  
+    int dim;  
+    struct kdnode *root;  
+    struct kdhyperrect *rect;  
+    void (*destr)(void*);  
+};  
+  
+//kdtree�ķ��ؽ��������kdtree������һ��˫��������ʽ  
+struct kdres {  
+    struct kdtree *tree;  
+    struct res_node *rlist, *riter;  //˫����?  
+    int size;  
+};  
+  
+//����ƽ���ĺ궨��,�൱�ں���  
+#define SQ(x)           ((x) * (x))  
+  
+  
+static void clear_rec(struct kdnode *node, void (*destr)(void*));  
+static int insert_rec(struct kdnode **node, const double *pos, void *data, int dir, int dim);  
+static int rlist_insert(struct res_node *list, struct kdnode *item, double dist_sq);  
+static void clear_results(struct kdres *set);  
+  
+static struct kdhyperrect *hyperrect_create(int dim, const double *min, const double *max);  
+static void hyperrect_free(struct kdhyperrect *d_rect);
+static struct kdhyperrect *hyperrect_duplicate(const struct kdhyperrect *rect);  
+static void hyperrect_extend(struct kdhyperrect *rect, const double *pos);  
+static double hyperrect_dist_sq(struct kdhyperrect *rect, const double *pos);  
+  
+#ifdef USE_LIST_NODE_ALLOCATOR  
+static struct res_node *alloc_resnode(void);  
+static void free_resnode(struct res_node*);  
+#else  
+#define alloc_resnode()     malloc(sizeof(struct res_node))  
+#define free_resnode(n)     free(n)  
+#endif  
+  
+  
+//����һ��kdtree  
+struct kdtree *kd_create(int k)
+{  
+    struct kdtree *tree;  
+  
+    if(!(tree = (kdtree*)malloc(sizeof *tree))) {  
+        return 0;  
+    }  
+  
+    tree->dim = k;  
+    tree->root = 0;  
+    tree->destr = 0;  
+    tree->rect = 0;  
+  
+    return tree;  
+}  
+  
+//�ͷŵ�kdtree  
+void kd_free(struct kdtree *tree)
+{  
+    if(tree) {  
+        kd_clear(tree);
+        free(tree);  
+    }  
+}  
+  
+//�������ƽ��,�ǰ��ڵ�ݹ�ؽ��е�  
+static void clear_rec(struct kdnode *node, void (*destr)(void*))  
+{  
+    if(!node) return;   //һ���ڵ��Ӧһ����ƽ��  
+  
+    //�ݹ麯�����ݹ����������������֧�ĳ�ƽ��Ͷ������ҷ�֧�ĳ�ƽ��  
+    clear_rec(node->left, destr);  
+    clear_rec(node->right, destr);  
+      
+    //���data���������Ϊ��,���ͷŵ�data  
+    if(destr)   
+    {  
+        destr(node->data);  
+    }  
+    //�ͷŽڵ����������  
+    free(node->pos);  
+    //�ͷŽڵ�  
+    free(node);  
+}  
+  
+//kdtree���  
+void kd_clear(struct kdtree *tree)
+{  
+    //�������ÿ���ڵ�ĳ�ƽ��,�ͷ����еĸ����ڵ�  
+    clear_rec(tree->root, tree->destr);  
+    tree->root = 0;  
+  
+    //������ĳ�ƽ��ָ�벻Ϊ��,��������ͷ�  
+    if (tree->rect)   
+    {  
+        hyperrect_free(tree->rect);  
+        tree->rect = 0;  
+    }  
+}  
+  
+//�������٣���һ�������ĺ���������data������  
+void kd_data_destructor(struct kdtree *tree, void (*destr)(void*))
+{  
+    //�������ĺ�����ִ��kdtree�����ٺ���  
+    tree->destr = destr;  
+}  
+  
+  
+//��һ�����ڵ�λ�ô����볬����  
+static int insert_rec(struct kdnode **nptr, const double *pos, void *data, int dir, int dim)  
+{  
+    int new_dir;  
+    struct kdnode *node;  
+  
+    //�������ڵ��ǲ����ڵ�  
+    if(!*nptr)   
+    {  
+        //����һ�����  
+        if(!(node = (kdnode *)malloc(sizeof *node)))   
+        {  
+            return -1;  
+        }  
+        if(!(node->pos = (float *)malloc(dim * sizeof *node->pos))) {  
+            free(node);  
+            return -1;  
+        }  
+        memcpy(node->pos, pos, dim * sizeof *node->pos);  
+        node->data = data;  
+        node->dir = dir;  
+        node->left = node->right = 0;  
+       * nptr = node;  
+        return 0;  
+    }  
+  
+    node = *nptr;  
+    new_dir = (node->dir + 1) % dim;  
+    if(pos[node->dir] < node->pos[node->dir]) {  
+        return insert_rec(&(*nptr)->left, pos, data, new_dir, dim);  
+    }  
+    return insert_rec(&(*nptr)->right, pos, data, new_dir, dim);  
+}  
+  
+//�ڵ�������  
+//����Ϊ:Ҫ���в��������kdtree,Ҫ����Ľڵ�����,Ҫ����Ľڵ������  
+int kd_insert(struct kdtree *tree, const double *pos, void *data)
+{  
+    //���볬����  
+    if (insert_rec(&tree->root, pos, data, 0, tree->dim))   
+    {  
+        return -1;  
+    }  
+    //�������û�г�����,�ʹ���һ��������  
+    //����Ѿ����˳�����,����չԭ�еĳ�����  
+    if (tree->rect == 0)   
+    {  
+        tree->rect = hyperrect_create(tree->dim, pos, pos);  
+    }   
+    else   
+    {  
+        hyperrect_extend(tree->rect, pos);  
+    }  
+  
+    return 0;  
+}  
+  
+//����float������Ľڵ�  
+//����Ϊ:Ҫ���в��������kdtree,Ҫ����Ľڵ�����,Ҫ����Ľڵ������  
+//��float�͵����긳ֵ��double�͵Ļ�����,�����������ת������в���  
+//��������һ������ת��  
+int kd_insertf(struct kdtree *tree, const float *pos, void *data)
+{  
+    static double sbuf[16];  
+    double *bptr, *buf = 0;  
+    int res, dim = tree->dim;  
+  
+    //���kdtree��ά������16, ����dimάdouble���͵�����  
+    if(dim > 16)   
+    {  
+#ifndef NO_ALLOCA  
+        if(dim <= 256)  
+            bptr = buf = (double*)alloca(dim * sizeof *bptr);  
+        else  
+#endif  
+            if(!(bptr = buf = (double*)malloc(dim * sizeof *bptr)))   
+            {  
+                return -1;  
+            }  
+    }   
+    //���kdtree��ά��С��16, ֱ�ӽ�ָ��ָ���ѷ�����ڴ�  
+    else   
+    {  
+        bptr = buf = sbuf;  
+    }  
+  
+    //��Ҫ������λ�����긳ֵ�����������  
+    while(dim-- > 0)   
+    {  
+       * bptr++ = *pos++;  
+    }  
+  
+    //���ýڵ���뺯��k insert  
+    res = kd_insert(tree, buf, data);
+#ifndef NO_ALLOCA  
+    if(tree->dim > 256)  
+#else  
+    if(tree->dim > 16)  
+#endif  
+        //�ͷŻ���  
+        free(buf);  
+    return res;  
+}  
+  
+//������ά����ֵ����άkdtree����  
+int kd_insert3(struct kdtree *tree, double x, double y, double z, void *data)
+{  
+    double buf[3];  
+    buf[0] = x;  
+    buf[1] = y;  
+    buf[2] = z;  
+    return kd_insert(tree, buf, data);
+}  
+  
+//������άfloat������ֵ����άkdtree����  
+int kd_insert3f(struct kdtree *tree, float x, float y, float z, void *data)
+{  
+    double buf[3];  
+    buf[0] = x;  
+    buf[1] = y;  
+    buf[2] = z;  
+    return kd_insert(tree, buf, data);
+}  
+  
+//�ҵ�����ڵĵ�  
+//����Ϊ:���ڵ�ָ��, λ������, ��ֵ, ���ؽ���Ľڵ�, bool������,ά��  
+static int find_nearest(struct kdnode *node, const double *pos, double range, struct res_node *list, int ordered, int dim)
+{  
+    double dist_sq, dx;  
+    int i, ret, added_res = 0;
+  
+    if(!node) return 0;  //ע������ط�,���ڵ�Ϊ�յ�ʱ��,�����Ѿ����ҵ����յ�Ҷ�ӽ��,����ֵΪ��  
+  
+    dist_sq = 0;  
+    //���������ڵ���ƽ����  
+    for(i=0; i<dim; i++)   
+    {  
+        dist_sq += SQ(node->pos[i] - pos[i]);  
+    }  
+    //�����������ֵ��Χ��,�ͽ�����뵽���ؽ��������  
+    if(dist_sq <= SQ(range))   
+    {         
+        if(rlist_insert(list, node, ordered ? dist_sq : -1.0) == -1)   
+        {  
+            return -1;  
+        }  
+        added_res = 1;
+    }  
+  
+    //������ڵ�Ļ��ַ�����,������֮��Ĳ�ֵ  
+    dx = pos[node->dir] - node->pos[node->dir];  
+  
+    //���������ֵ�ķ���, ѡ����еݹ���ҵķ�֧����  
+    ret = find_nearest(dx <= 0.0 ? node->left : node->right, pos, range, list, ordered, dim);
+    //������ص�ֵ���ڵ�����,�����������֧�������������Ľڵ�,�򷵻ؽ���ĸ��������ۼ�,���ڽڵ����һ��������в�������Ľڵ�  
+    if(ret >= 0 && fabs(dx) < range)   
+    {  
+        added_res += ret;
+        ret = find_nearest(dx <= 0.0 ? node->right : node->left, pos, range, list, ordered, dim);
+    }  
+    if(ret == -1)   
+    {  
+        return -1;  
+    }  
+    added_res += ret;
+  
+    return added_res;
+}  
+  
+  
+//�ҵ�����ڵ�n���ڵ�  
+#if 0  
+static int fin nearest_n(struct kdnode *node, const double *pos, double range, int num, struct rheap *heap, int dim)  
+{  
+    double dist_sq, dx;  
+    int i, ret, adde res = 0;  
+  
+    if(!node) return 0;  
+      
+    /*if the photon is close enough, add it to the result heap */  
+    //����㹻���ͽ�����뵽�������  
+    dist_sq = 0;  
+    //�������߼��ŷʽ����  
+    for(i=0; i<dim; i++)   
+    {  
+        dist_sq += SQ(node->pos[i] - pos[i]);  
+    }  
+    //����������þ���С����ֵ  
+    if(dist_sq <= range_sq) {  
+    //����ѵĴ�С����num,Ҳ���Ǵ����ܵ�Ҫ�ҵĽڵ���  
+        if(heap->size >= num)  
+        {  
+            /*get furthest element */  
+            //�õ���Զ�Ľڵ�  
+            struct res_node *maxelem = rheap_get_max(heap);  
+  
+            /*and check if the new one is closer than that */  
+            //��������ڵ��ǲ��Ǳ���Զ�Ľڵ�Ҫ��  
+            if(maxelem->dist_sq > dist_sq)   
+            {  
+            //����ǵĻ�,���Ƴ���Զ�Ľڵ�  
+                rheap_remove_max(heap);  
+                //�����˽ڵ�������  
+                if(rheap_insert(heap, node, dist_sq) == -1)   
+                {  
+                    return -1;  
+                }  
+                adde res = 1;  
+  
+                range_sq = dist_sq;  
+            }  
+        }   
+        //����ѵĴ�СС��num,ֱ�ӽ��˽ڵ�������  
+        else   
+        {  
+            if(rheap_insert(heap, node, dist_sq) == -1)   
+            {  
+                return =1;  
+            }  
+            adde res = 1;  
+        }  
+    }  
+  
+  
+    /*find signed distance from the splitting plane */  
+    dx = pos[node->dir] - node->pos[node->dir];  
+  
+    ret = fin nearest_n(dx <= 0.0 ? node->left : node->right, pos, range, num, heap, dim);  
+    if(ret >= 0 && fabs(dx) < range) {  
+        adde res += ret;  
+        ret = fin nearest_n(dx <= 0.0 ? node->right : node->left, pos, range, num, heap, dim);  
+    }  
+}  
+#endif  
+  
+  
+static void kd_nearest_i(struct kdnode *node, const double *pos, struct kdnode **result, double *result_dist_sq, struct kdhyperrect *rect)
+{  
+    int dir = node->dir;  
+    int i;  
+    double dummy, dist_sq;  
+    struct kdnode *nearer_subtree, *farther_subtree;  
+    double *nearer_hyperrect_coord, *farther_hyperrect_coord;  
+  
+    /*Decide whether to go left or right in the tree */  
+    //�ڶ�������,���������߻���������  
+    dummy = pos[dir] - node->pos[dir];  
+    if (dummy <= 0)   
+    {  
+        nearer_subtree = node->left;  
+        farther_subtree = node->right;  
+        nearer_hyperrect_coord = rect->max + dir;  
+        farther_hyperrect_coord = rect->min + dir;  
+    }   
+    else   
+    {  
+        nearer_subtree = node->right;  
+        farther_subtree = node->left;  
+        nearer_hyperrect_coord = rect->min + dir;  
+        farther_hyperrect_coord = rect->max + dir;  
+    }  
+  
+    if (nearer_subtree) {  
+        /*Slice the hyperrect to get the hyperrect of the nearer subtree */  
+        dummy = *nearer_hyperrect_coord;  
+       * nearer_hyperrect_coord = node->pos[dir];  
+        /*Recurse down into nearer subtree */  
+        kd_nearest_i(nearer_subtree, pos, result, result_dist_sq, rect);
+        /*Undo the slice */  
+       * nearer_hyperrect_coord = dummy;  
+    }  
+  
+    /*Check the distance of the point at the current node, compare it 
+     * with our best so far */  
+    dist_sq = 0;  
+    for(i=0; i < rect->dim; i++)   
+    {  
+        dist_sq += SQ(node->pos[i] - pos[i]);  
+    }  
+    if (dist_sq < *result_dist_sq)   
+    {  
+       * result = node;  
+       * result_dist_sq = dist_sq;  
+    }  
+  
+    if (farther_subtree) {  
+        /*Get the hyperrect of the farther subtree */  
+        dummy = *farther_hyperrect_coord;  
+       * farther_hyperrect_coord = node->pos[dir];  
+        /*Check if we have to recurse down by calculating the closest 
+         * point of the hyperrect and see if it's closer than our 
+         * minimum distance in result_dist_sq. */  
+        if (hyperrect_dist_sq(rect, pos) < *result_dist_sq) {  
+            /*Recurse down into farther subtree */  
+            kd_nearest_i(farther_subtree, pos, result, result_dist_sq, rect);
+        }  
+        /*Undo the slice on the hyperrect */  
+       * farther_hyperrect_coord = dummy;  
+    }  
+}  
+  
+//��kdtree�����pos����ڵ�ֵ  
+struct kdres *kd_nearest(struct kdtree *kd, const double *pos)
+{  
+    struct kdhyperrect *rect;  
+    struct kdnode *result;  
+    struct kdres *rset;  
+    double dist_sq;  
+    int i;  
+  
+    //���kd������,�����䳬ƽ�治���ڵĻ�,��Ͳ����н��  
+    if (!kd) return 0;  
+    if (!kd->rect) return 0;  
+  
+    /*Allocate result set */  
+    //Ϊ���ؽ�����Ϸ���ռ�  
+    if(!(rset = (kdres*)malloc(sizeof *rset)))   
+    {  
+        return 0;  
+    }  
+    if(!(rset->rlist = (res_node*)alloc_resnode())) {  
+        free(rset);  
+        return 0;  
+    }  
+    rset->rlist->next = 0;  
+    rset->tree = kd;  
+  
+    /*Duplicate the bounding hyperrectangle, we will work on the copy */  
+    //���Ʊ߽糬ƽ��  
+    if (!(rect = hyperrect_duplicate(kd->rect)))   
+    {  
+        kd_res_free(rset);
+        return 0;  
+    }  
+  
+    /*Our first guesstimate is the root node */  
+    result = kd->root;  
+    dist_sq = 0;  
+    for (i = 0; i < kd->dim; i++)  
+        dist_sq += SQ(result->pos[i] - pos[i]);  
+  
+    /*Search for the nearest neighbour recursively */  
+    //�ݹ�ز�������ڵ��ھ�  
+    kd_nearest_i(kd->root, pos, &result, &dist_sq, rect);
+  
+    /*Free the copy of the hyperrect */  
+    //�ͷų�����  
+    hyperrect_free(rect);  
+  
+    /*Store the result */  
+    //�洢���  
+    if (result)   
+    {  
+        if (rlist_insert(rset->rlist, result, -1.0) == -1)   
+        {  
+            kd_res_free(rset);
+            return 0;  
+        }  
+        rset->size = 1;  
+        kd_res_rewind(rset);
+        return rset;  
+    }   
+    else   
+    {  
+        kd_res_free(rset);
+        return 0;  
+    }  
+}  
+  
+//k nearest��float����  
+struct kdres *kd_nearestf(struct kdtree *tree, const float *pos)
+{  
+    static double sbuf[16];  
+    double *bptr, *buf = 0;  
+    int dim = tree->dim;  
+    struct kdres *res;  
+  
+    if(dim > 16) {  
+#ifndef NO_ALLOCA  
+        if(dim <= 256)  
+            bptr = buf = (double*)alloca(dim * sizeof *bptr);  
+        else  
+#endif  
+            if(!(bptr = buf = (double*)malloc(dim * sizeof *bptr))) {  
+                return 0;  
+            }  
+    } else {  
+        bptr = buf = sbuf;  
+    }  
+  
+    while(dim-- > 0) {  
+       * bptr++ = *pos++;  
+    }  
+  
+    res = kd_nearest(tree, buf);
+#ifndef NO_ALLOCA  
+    if(tree->dim > 256)  
+#else  
+    if(tree->dim > 16)  
+#endif  
+        free(buf);  
+    return res;  
+}  
+  
+//k nearest������������  
+struct kdres *kd_nearest3(struct kdtree *tree, double x, double y, double z)
+{  
+    double pos[3];  
+    pos[0] = x;  
+    pos[1] = y;  
+    pos[2] = z;  
+    return kd_nearest(tree, pos);
+}  
+  
+//k nearest��������float����  
+struct kdres *kd_nearest3f(struct kdtree *tree, float x, float y, float z)
+{  
+    double pos[3];  
+    pos[0] = x;  
+    pos[1] = y;  
+    pos[2] = z;  
+    return kd_nearest(tree, pos);
+}  
+  
+/*---- nearest N search ---- */  
+/*
+static kdres *k nearest_n(struct kdtree *kd, const double *pos, int num) 
+{ 
+    int ret; 
+    struct kdres *rset; 
+ 
+    if(!(rset = malloc(sizeof *rset))) { 
+        return 0; 
+    } 
+    if(!(rset->rlist = alloc_resnode())) { 
+        free(rset); 
+        return 0; 
+    } 
+    rset->rlist->next = 0; 
+    rset->tree = kd; 
+ 
+    if((ret = fin nearest_n(kd->root, pos, range, num, rset->rlist, kd->dim)) == -1) { 
+        k res_free(rset); 
+        return 0; 
+    } 
+    rset->size = ret; 
+    k res_rewind(rset); 
+    return rset; 
+}*/  
+  
+//�ҵ��������С��rangeֵ�Ľڵ�  
+struct kdres *kd_nearest_range(struct kdtree *kd, const double *pos, double range)
+{  
+    int ret;  
+    struct kdres *rset;  
+  
+    if(!(rset = (kdres*)malloc(sizeof *rset))) {  
+        return 0;  
+    }  
+    if(!(rset->rlist = (res_node*)alloc_resnode())) {  
+        free(rset);  
+        return 0;  
+    }  
+    rset->rlist->next = 0;  
+    rset->tree = kd;  
+  
+    if((ret = find_nearest(kd->root, pos, range, rset->rlist, 0, kd->dim)) == -1) {
+        kd_res_free(rset);
+        return 0;  
+    }  
+    rset->size = ret;  
+    kd_res_rewind(rset);
+    return rset;  
+}  
+  
+//k nearest_range��float����  
+struct kdres *kd_nearest_rangef(struct kdtree *kd, const float *pos, float range)
+{  
+    static double sbuf[16];  
+    double *bptr, *buf = 0;  
+    int dim = kd->dim;  
+    struct kdres *res;  
+  
+    if(dim > 16) {  
+#ifndef NO_ALLOCA  
+        if(dim <= 256)  
+            bptr = buf = (double*)alloca(dim * sizeof *bptr);  
+        else  
+#endif  
+            if(!(bptr = buf = (double*)malloc(dim * sizeof *bptr))) {  
+                return 0;  
+            }  
+    } else {  
+        bptr = buf = sbuf;  
+    }  
+  
+    while(dim-- > 0) {  
+       * bptr++ = *pos++;  
+    }  
+  
+    res = kd_nearest_range(kd, buf, range);
+#ifndef NO_ALLOCA  
+    if(kd->dim > 256)  
+#else  
+    if(kd->dim > 16)  
+#endif  
+        free(buf);  
+    return res;  
+}  
+  
+//k nearest_range������������  
+struct kdres *kd_nearest_range3(struct kdtree *tree, double x, double y, double z, double range)
+{  
+    double buf[3];  
+    buf[0] = x;  
+    buf[1] = y;  
+    buf[2] = z;  
+    return kd_nearest_range(tree, buf, range);
+}  
+  
+//k nearest_range��������float����  
+struct kdres *kd_nearest_range3f(struct kdtree *tree, float x, float y, float z, float range)
+{  
+    double buf[3];  
+    buf[0] = x;  
+    buf[1] = y;  
+    buf[2] = z;  
+    return kd_nearest_range(tree, buf, range);
+}  
+  
+//���ؽ�����ͷ�  
+void kd_res_free(struct kdres *rset)
+{  
+    clear_results(rset);  
+    free_resnode(rset->rlist);  
+    free(rset);  
+}  
+  
+//��ȡ���ؽ�����ϵĴ�С  
+int kd_res_size(struct kdres *set)
+{  
+    return (set->size);  
+}  
+  
+//�ٴλص�����ڵ㱾����λ��  
+void kd_res_rewind(struct kdres *rset)
+{  
+    rset->riter = rset->rlist->next;  
+}  
+  
+//�ҵ����ؽ���е����սڵ�  
+int kd_res_end(struct kdres *rset)
+{  
+    return rset->riter == 0;  
+}  
+  
+//���ؽ���б��е���һ���ڵ�  
+int kd_res_next(struct kdres *rset)
+{  
+    rset->riter = rset->riter->next;  
+    return rset->riter != 0;  
+}  
+  
+//�����ؽ���Ľڵ�������data��ȡ����  
+void *kd_res_item(struct kdres *rset, double *pos)
+{  
+    if(rset->riter) {  
+        if(pos) {  
+            memcpy(pos, rset->riter->item->pos, rset->tree->dim * sizeof *pos);  
+        }  
+        return rset->riter->item->data;  
+    }  
+    return 0;  
+}  
+  
+//�����ؽ���Ľڵ�������data��ȡ����,����Ϊfloat�͵�ֵ  
+void *kd_res_itemf(struct kdres *rset, float *pos)
+{  
+    if(rset->riter) {  
+        if(pos) {  
+            int i;  
+            for(i=0; i<rset->tree->dim; i++) {  
+                pos[i] = rset->riter->item->pos[i];  
+            }  
+        }  
+        return rset->riter->item->data;  
+    }  
+    return 0;  
+}  
+  
+//�����ؽ���Ľڵ�������data��ȡ����,���������ʽ����  
+void *kd_res_item3(struct kdres *rset, double *x, double *y, double *z)
+{  
+    if(rset->riter) {  
+        if(*x) *x = rset->riter->item->pos[0];  
+        if(*y) *y = rset->riter->item->pos[1];  
+        if(*z) *z = rset->riter->item->pos[2];  
+    }  
+    return 0;  
+}  
+  
+//�����ؽ���Ľڵ�������data��ȡ����,����Ϊfloat�͵�ֵ,���������ʽ����  
+void *kd_res_item3f(struct kdres *rset, float *x, float *y, float *z)
+{  
+    if(rset->riter) {  
+        if(*x) *x = rset->riter->item->pos[0];  
+        if(*y) *y = rset->riter->item->pos[1];  
+        if(*z) *z = rset->riter->item->pos[2];  
+    }  
+    return 0;  
+}  
+  
+//��ȡdata����  
+void *kd_res_item_data(struct kdres *set)
+{  
+    return kd_res_item(set, 0);
+}  
+  
+/*---- hyperrectangle helpers ---- */  
+//������ƽ��,������������:ά��,ÿά����Сֵ�����ֵ����  
+static struct kdhyperrect *hyperrect_create(int dim, const double *min, const double *max)  
+{  
+    size_t size = dim * sizeof(double);  
+    struct kdhyperrect *rect = 0;  
+  
+    if (!(rect = (kdhyperrect*)malloc(sizeof(struct kdhyperrect))))   
+    {  
+        return 0;  
+    }  
+  
+    rect->dim = dim;  
+    if (!(rect->min = (double*)malloc(size))) {  
+        free(rect);  
+        return 0;  
+    }  
+    if (!(rect->max = (double*)malloc(size))) {  
+        free(rect->min);  
+        free(rect);  
+        return 0;  
+    }  
+    memcpy(rect->min, min, size);  
+    memcpy(rect->max, max, size);  
+  
+    return rect;  
+}  
+  
+//�ͷų�ƽ��ṹ��  
+static void hyperrect_free(struct kdhyperrect *rect)  
+{  
+    free(rect->min);  
+    free(rect->max);  
+    free(rect);  
+}  
+  
+//��ֵ��ƽ��ṹ��  
+static struct kdhyperrect *hyperrect_duplicate(const struct kdhyperrect *rect)  
+{  
+    return hyperrect_create(rect->dim, rect->min, rect->max);  
+}  
+  
+//���³�ƽ��ṹ�����\��Сֵ����  
+static void hyperrect_extend(struct kdhyperrect *rect, const double *pos)  
+{  
+    int i;  
+  
+    for (i=0; i < rect->dim; i++) {  
+        if (pos[i] < rect->min[i]) {  
+            rect->min[i] = pos[i];  
+        }  
+        if (pos[i] > rect->max[i]) {  
+            rect->max[i] = pos[i];  
+        }  
+    }  
+}  
+  
+//����̶�������볬ƽ��֮��ľ���  
+static double hyperrect_dist_sq(struct kdhyperrect *rect, const double *pos)  
+{  
+    int i;  
+    double result = 0;  
+  
+    for (i=0; i < rect->dim; i++)   
+    {  
+        if (pos[i] < rect->min[i])   
+        {  
+            result += SQ(rect->min[i] - pos[i]);  
+        }   
+        else if (pos[i] > rect->max[i])   
+        {  
+            result += SQ(rect->max[i] - pos[i]);  
+        }  
+    }  
+    return result;  
+}  
+  
+  
+/*---- static helpers ---- */  
+#ifdef USE_LIST_NODE_ALLOCATOR  
+/*special list node allocators. */  
+static struct res_node *free_nodes;  
+  
+#ifndef NO_PTHREADS  
+static pthrea mutex_t alloc_mutex = PTHREA MUTEX_INITIALIZER;  
+#endif  
+  
+//�������ؽ���ڵ�  
+static struct res_node *alloc_resnode(void)  
+{  
+    struct res_node *node;  
+  
+#ifndef NO_PTHREADS  
+    pthrea mutex_lock(&alloc_mutex);  
+#endif  
+  
+    if(!free_nodes) {  
+        node = malloc(sizeof *node);  
+    } else {  
+        node = free_nodes;  
+        free_nodes = free_nodes->next;  
+        node->next = 0;  
+    }  
+  
+#ifndef NO_PTHREADS  
+    pthrea mutex_unlock(&alloc_mutex);  
+#endif  
+  
+    return node;  
+}  
+  
+//�ͷŷ��ؽ���ڵ�  
+static void free_resnode(struct res_node *node)  
+{  
+#ifndef NO_PTHREADS  
+    pthrea mutex_lock(&alloc_mutex);  
+#endif  
+  
+    node->next = free_nodes;  
+    free_nodes = node;  
+  
+#ifndef NO_PTHREADS  
+    pthrea mutex_unlock(&alloc_mutex);  
+#endif  
+}  
+#endif  /*list node allocator or not */  
+  
+  
+/*inserts the item. if dist_sq is >= 0, then do an ordered insert */  
+/*TODO make the ordering code use heapsort */  
+//��������: ���ؽ���ڵ�ָ��,���ڵ�ָ��,���뺯��  
+//��һ������ڵ���뵽���ؽ�����б���  
+static int rlist_insert(struct res_node *list, struct kdnode *item, double dist_sq)  
+{  
+    struct res_node *rnode;  
+  
+    //����һ�����ؽ���Ľڵ�  
+    if(!(rnode = (res_node*)alloc_resnode()))   
+    {  
+        return -1;  
+    }  
+    rnode->item = item;           //��Ӧ�����ڵ�  
+    rnode->dist_sq = dist_sq;     //��Ӧ�ľ���ֵ  
+  
+    //������������ʱ��  
+    if(dist_sq >= 0.0)   
+    {  
+        while(list->next && list->next->dist_sq < dist_sq)   
+        {  
+            list = list->next;  
+        }  
+    }  
+    rnode->next = list->next;  
+    list->next = rnode;  
+    return 0;  
+}  
+  
+//������ؽ���ļ���  
+//�������Ǹ�˫�����е�����������  
+static void clear_results(struct kdres *rset)  
+{  
+    struct res_node *tmp, *node = rset->rlist->next;  
+  
+    while(node)   
+    {  
+        tmp = node;  
+        node = node->next;  
+        free_resnode(tmp);  
+    }  
+  
+    rset->rlist->next = 0;  
+}  
\ No newline at end of file
diff --git a/kdtree.h b/kdtree.h
new file mode 100644
index 0000000..01f3f51
--- /dev/null
+++ b/kdtree.h
@@ -0,0 +1,105 @@
+#ifndef _KDTREE_H_  
+#define _KDTREE_H_  
+  
+#ifdef __cplusplus  
+extern "C" {  
+#endif  
+  
+struct kdtree;  
+struct kdres;  
+
+
+
+  
+/*create a kd-tree for "k"-dimensional data */  
+struct kdtree *kd_create(int k);  
+  
+/*free the struct kdtree */  
+void kd_free(struct kdtree *tree);  
+  
+/*remove all the elements from the tree */  
+void kd_clear(struct kdtree *tree);  
+  
+/*if called with non-null 2nd argument, the function provided 
+ * will be called on data pointers (see k insert) when nodes 
+ * are to be removed from the tree. 
+ */  
+void kd_data_destructor(struct kdtree *tree, void (*destr)(void*));  
+  
+/*insert a node, specifying its position, and optional data */  
+int kd_insert(struct kdtree *tree, const double *pos, void *data);
+int kd_insertf(struct kdtree *tree, const float *pos, void *data);
+int kd_insert3(struct kdtree *tree, double x, double y, double z, void *data);
+int kd_insert3f(struct kdtree *tree, float x, float y, float z, void *data);
+  
+/*Find the nearest node from a given point. 
+ * 
+ * This function returns a pointer to a result set with at most one element. 
+ */  
+struct kdres *kd_nearest(struct kdtree *tree, const double *pos);
+struct kdres *kd_nearestf(struct kdtree *tree, const float *pos);
+struct kdres *kd_nearest3(struct kdtree *tree, double x, double y, double z);
+struct kdres *kd_nearest3f(struct kdtree *tree, float x, float y, float z);
+  
+/*Find the N nearest nodes from a given point. 
+ * 
+ * This function returns a pointer to a result set, with at most N elements, 
+ * which can be manipulated with the k res_ *functions. 
+ * The returned pointer can be null as an indication of an error. Otherwise 
+ * a valid result set is always returned which may contain 0 or more elements. 
+ * The result set must be deallocated with k res_free after use. 
+ */  
+/*
+struct kdres *k nearest_n(struct kdtree *tree, const double *pos, int num); 
+struct kdres *k nearest_nf(struct kdtree *tree, const float *pos, int num); 
+struct kdres *k nearest_n3(struct kdtree *tree, double x, double y, double z); 
+struct kdres *k nearest_n3f(struct kdtree *tree, float x, float y, float z); 
+*/  
+  
+/*Find any nearest nodes from a given point within a range. 
+ * 
+ * This function returns a pointer to a result set, which can be manipulated 
+ * by the k res_ *functions. 
+ * The returned pointer can be null as an indication of an error. Otherwise 
+ * a valid result set is always returned which may contain 0 or more elements. 
+ * The result set must be deallocated with k res_free after use. 
+ */  
+struct kdres *kd_nearest_range(struct kdtree *tree, const double *pos, double range);
+struct kdres *kd_nearest_rangef(struct kdtree *tree, const float *pos, float range);
+struct kdres *kd_nearest_range3(struct kdtree *tree, double x, double y, double z, double range);
+struct kdres *kd_nearest_range3f(struct kdtree *tree, float x, float y, float z, float range);
+  
+/*frees a result set returned by k nearest_range() */  
+void kd_res_free(struct kdres *set);
+  
+/*returns the size of the result set (in elements) */  
+int kd_res_size(struct kdres *set);
+  
+/*rewinds the result set iterator */  
+void kd_res_rewind(struct kdres *set);
+  
+/*returns non-zero if the set iterator reached the end after the last element */  
+int kd_res_end(struct kdres *set);
+  
+/*advances the result set iterator, returns non-zero on success, zero if 
+ * there are no more elements in the result set. 
+ */  
+int kd_res_next(struct kdres *set);
+  
+/*returns the data pointer (can be null) of the current result set item 
+ * and optionally sets its position to the pointers(s) if not null. 
+ */  
+void *kd_res_item(struct kdres *set, double *pos);
+void *kd_res_itemf(struct kdres *set, float *pos);
+void *kd_res_item3(struct kdres *set, double *x, double *y, double *z);
+void *kd_res_item3f(struct kdres *set, float *x, float *y, float *z);
+  
+/*equivalent to k res_item(set, 0) */  
+void *kd_res_item_data(struct kdres *set);
+  
+  
+#ifdef __cplusplus  
+}  
+#endif  
+  
+#endif  /*_KDTREE_H_ */  
diff --git a/main.cpp b/main.cpp
new file mode 100644
index 0000000..c7eb308
--- /dev/null
+++ b/main.cpp
@@ -0,0 +1,352 @@
+#include <iostream>
+#include <windows.h>
+#include <queue>
+#include <Kinect.h>
+#include <list>
+
+#include "scene.h"
+#include "SpringsBuilder.h"
+#include "Simulator.h"
+#include "parameter.h"
+#include "./bvh/BVHAccel.h"
+#include "generateBody.h"
+#include "detectBody.h"
+#include "KinectJointFilter.h"
+#include "Mesh.h"
+#include "VAOMesh.h"
+#include "Cloth.h"
+#include "ObjLoader.h"
+
+// #define VLD_FORCE_ENABLE
+// #include<vld.h>
+
+using namespace std;
+
+extern int StartNum = 100;
+extern int StopNum = 300;
+int NameofBody = StartNum;
+
+string clothfile = "../smooth/2/";
+
+queue<Mesh> BodyQueue;
+queue<Vec4s> VQueue;
+queue<Vec3s> NQueue;
+
+HANDLE hMutex = NULL;
+
+list<mat> smoothList;
+list<mat>::iterator iter;
+
+DoudouHead_Merge DoudouHead_Solver;
+
+//Thread1 for Getting Every Frames Body
+DWORD WINAPI GetBodyData(LPVOID pParam)
+{
+	for (int i = StartNum + 1; i <= StopNum; i++)
+	{
+		string file_name;
+		char num[4];
+		_itoa_s(i, num, 10);
+		string s = num;
+		file_name = clothfile + s + ".obj";
+
+		ObjLoader loader;
+
+		Mesh new_body;
+		loader.load(new_body, file_name);
+		new_body.scale(0.30f);
+		new_body.translate(0.0f, 1.0f, 0.0f);
+		BodyQueue.push(new_body);
+	}
+
+	return 0;
+}
+
+//Thread1 for Getting Every Frames Body
+DWORD WINAPI DtoG(LPVOID pParam)
+{
+	//Initiate Template
+	SMPL bodyTemplate = SMPL(MALE);
+	cout << "SMPL::initial finished!" << endl;
+
+	//Initiate Sensor  
+	IKinectSensor *pSensor;
+	HRESULT hResult = S_OK;
+	hResult = GetDefaultKinectSensor(&pSensor);
+	hResult = pSensor->Open();
+	if (FAILED(hResult)) {
+		std::cerr << "Error : IKinectSensor::Open()" << std::endl;
+		return -1;
+	}
+
+	IBodyFrameSource *pBodySource;
+	hResult = pSensor->get_BodyFrameSource(&pBodySource);
+	if (FAILED(hResult)) {
+		std::cerr << "Error : IKinectSensor::get_BodyFrameSource()" << std::endl;
+		return -1;
+	}
+
+	IBodyFrameReader *pBodyReader;
+	hResult = pBodySource->OpenReader(&pBodyReader);
+	if (FAILED(hResult)) {
+		std::cerr << "Error : IBodyFrameSource::OpenReader()" << std::endl;
+		return -1;
+	}
+
+	//mat pp = zeros(24, 3);
+	//mat result=bodyTemplate.gen_pose_model(pp, TRUE);
+	//bodyTemplate.write_to_obj(result, "MALE.obj");
+
+	// Holt Double Exponential Smoothing Filter
+	Sample::FilterDoubleExponential filter[BODY_COUNT];
+
+	// Option : Setting Smoothing Parameter
+	for (int count = 0; count < BODY_COUNT; count++) {
+		float smoothing = 0.5f;          // [0..1], lower values closer to raw data
+		float correction = 0.5f;         // [0..1], lower values slower to correct towards the raw data
+		float prediction = 0.5f;         // [0..n], the number of frames to predict into the future
+		float jitterRadius = 0.05f;       // The radius in meters for jitter reduction
+		float maxDeviationRadius = 0.04f; // The maximum radius in meters that filtered positions are allowed to deviate from raw data
+
+		filter[count].Init(smoothing, correction, prediction, jitterRadius, maxDeviationRadius);
+	}
+
+	//The label number of the first body detected by Kinect
+	int BODY_LABEL = -1;
+
+	StopWatch time;
+	time.start();
+	int counter = 1;
+	bool tag = TRUE;
+	bool first = TRUE;
+	while (counter)
+	{
+		Vec4s vertex;
+		Vec3s normal;
+		//Obj new_body = BodyQueue.front();
+		mat trans_joint;
+
+		//bool judge = detectJoint(hResult, pBodyReader, joint);
+
+		IBodyFrame *pBodyFrame = nullptr;
+		hResult = pBodyReader->AcquireLatestFrame(&pBodyFrame);
+		if (SUCCEEDED(hResult)) {
+			IBody *pBody[BODY_COUNT] = { 0 };
+
+			hResult = pBodyFrame->GetAndRefreshBodyData(BODY_COUNT, pBody);
+			if (SUCCEEDED(hResult)) {
+				for (int count = 0; count < BODY_COUNT; count++) {
+					BOOLEAN bTracked = false;
+					hResult = pBody[count]->get_IsTracked(&bTracked);
+					if (bTracked&&SUCCEEDED(hResult) && BODY_LABEL == -1)
+						BODY_LABEL = count;
+					if (SUCCEEDED(hResult) && bTracked && count == BODY_LABEL) {
+						//counter--;
+						Joint joint[JointType::JointType_Count];
+						/////////////////////////////     
+						hResult = pBody[count]->GetJoints(JointType::JointType_Count, joint);//joint 
+
+						//////////////////////// Filtered Joint//////////////////////////////////
+						filter[count].Update(joint);
+						const DirectX::XMVECTOR *vec = filter[count].GetFilteredJoints();
+						for (int type = 0; type < JointType::JointType_Count; type++) {
+							if (joint[type].TrackingState != TrackingState::TrackingState_NotTracked) {
+								float x = 0.0f, y = 0.0f, z = 0.0f;
+								DirectX::XMVectorGetXPtr(&x, vec[type]);
+								DirectX::XMVectorGetYPtr(&y, vec[type]);
+								DirectX::XMVectorGetZPtr(&z, vec[type]);
+							}
+						}
+						////////////////////////////////////////////////////////////////////////
+						//Get joint for genBody from kinect joint 
+						trans_joint = JointTransform(joint);
+						////////////////Transition from T-pose to first frame///////////////////////////////////
+						if (first == TRUE) {
+							mat pose = bodyTemplate.J_to_pose(trans_joint);
+							float coefficient = 0.04f / max(max(pose));
+							cout << coefficient << endl;
+							mat transition = zeros(24, 3);
+							int num = 0;
+							while (max(max(abs(transition))) < max(max(abs(pose))))
+							{
+								//transition.print("t:");
+								genFirstBody(transition, vertex, normal, bodyTemplate);
+								transition += pose*coefficient;
+								VQueue.push(vertex);
+								NQueue.push(normal);
+								num++;
+							}
+							cout << num << endl;
+							first = FALSE;
+						}
+						//////////////////////////////////////////////////////////////////////////////////
+						/////////////////////////////Smooth by List////////////////////////////////////////
+						mat sum = zeros(24, 3);
+						if (smoothList.size() < 5)
+							smoothList.push_back(trans_joint);
+						else {
+
+							for (iter = smoothList.begin(); iter != smoothList.end(); ++iter)
+							{
+								sum += (*iter);
+							}
+							sum = sum / 5;
+							smoothList.pop_front();
+							smoothList.push_back(trans_joint);
+
+							///////////////////////////////////////////////////////////////////////////
+
+							genBodyVector(sum, vertex, normal, bodyTemplate);
+
+							cout << "A new pose has been detected!" << endl;
+
+							if (tag == TRUE) {
+								VQueue.push(vertex);
+								NQueue.push(normal);
+								tag = FALSE;
+								cout << "num:" << VQueue.size() << endl;
+							}
+							else tag = TRUE;
+							time.stop();
+							cout << "cost:" << time.elapsed_ms() << endl;
+							time.restart();
+						}
+						//return TRUE;
+					}
+				}
+			}
+			for (int count = 0; count < BODY_COUNT; count++) {
+				SafeRelease(pBody[count]);
+			}
+		}
+		SafeRelease(pBodyFrame);
+
+		//if (judge)
+		//{
+		//	genBody(joint, new_body);
+		//	cout << "A new pose has been detected!" << endl;
+		//}
+		//else continue;
+
+		//new_body.scale_translate(0.30, 0, 1.0, 0);
+		//new_body.unified();
+		//BodyQueue.push(new_body);
+	}
+
+	SafeRelease(pBodySource);
+	SafeRelease(pBodyReader);;
+	if (pSensor) {
+		pSensor->Close();
+	}
+	SafeRelease(pSensor);
+
+	return 0;
+}
+
+int main(int argc, char* *argv)
+{
+	Scene::instance().initialize(argc, argv); //initialize opengl 
+
+	ObjLoader loader;
+	Cloth cloth(SINGLE_LAYER_NOB);
+
+	////�����·�
+	//Obj cloth("../cloth/cloth.obj");    //pose0
+	//cloth.scale_translate(0.31, 0, 1.95, 0.02);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/dress2/dress2-iso.obj",SINGLE_LAYER_NOB);  
+	//cloth.rotation(90, X);   //
+	//cloth.scale_translate(0.24, 0, 1.2, 0.02); 
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/dress3/dress3.obj",SINGLE_LAYER_NOB);  
+	//cloth.rotation(90, X);   
+	//cloth.scale_translate(0.24, 0, 0.45, 0.02); 
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/dress-asymmetric/dress-asymmetric.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   //
+	//cloth.scale_translate(0.25, 0, 1.10, 0.02);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/dress-victor/dress-victor.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   
+	//cloth.scale_translate(0.25, 0, 1.60, 0.02);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/robe/robe.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   //
+	//cloth.scale_translate(0.3, 0, 0.5, 0.0);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/tshirt/tshirt.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   
+	//cloth.rotation(-5, Z);
+	//cloth.scale_translate(0.26, 0, 1.18, -0.1);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/shirt/shirt.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   
+	//cloth.rotation(-4, Z);
+	//cloth.scale_translate(0.27, 0, 2.1, 0.15);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/skirt/skirt.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   
+	//cloth.scale_translate(0.29, 0, 0.5, 0);
+	//cloth.unified();
+
+	loader.load(cloth, "../cloth_no_boundary/tshirt2/tshirt2.obj");
+	cloth.rotation(90, 0, 0);
+	cloth.rotation(0, 0, -4);
+	cloth.scale(0.28f);
+	cloth.translate(0, 0.9f, -2.2f);
+
+
+	//Obj cloth("../cloth_no_boundary/shorts/shorts.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   //
+	//cloth.scale_translate(0.29, 0, 0.5, 0);
+	//cloth.unified();
+
+	//Obj cloth("../cloth_no_boundary/vest/vest.obj", SINGLE_LAYER_NOB);
+	//cloth.rotation(90, X);   
+	//cloth.scale_translate(0.28, 0, 1.4, 0.02); 
+	//cloth.unified();
+
+
+	//string file;
+	//char num_s[4];
+	//_itoa_s(StartNum, num_s, 10);
+	//string ss = num_s;
+	//file = clothfile + ss + ".obj";
+	//Obj body(file);
+
+	VAOMesh body, head;
+
+	loader.load(body, "../Template/MALE.obj");
+	body.scale(0.3f);
+	body.translate(0.0f, 0.6f, 0.0f);
+
+	loader.load(head, "../DoudouHead/HeadColored.obj");
+	DoudouHead_Solver.Init(head);
+
+
+	Scene::instance().add(cloth);
+	Scene::instance().add(body);
+	Scene::instance().add(head);
+
+	Scene::instance().initiate_body_template(body);
+	Scene::instance().update_simulating_cloth(cloth);
+	Scene::instance().update_simulating_body(body);
+
+	HANDLE hThread1 = CreateThread(NULL, 0, DtoG, NULL, 0, NULL);
+	CloseHandle(hThread1);
+	hMutex = CreateMutex(NULL, FALSE, NULL);
+
+	Scene::instance().render();
+
+	return 0;
+}
+
+
diff --git a/parameter.cpp b/parameter.cpp
new file mode 100644
index 0000000..c0eab1e
--- /dev/null
+++ b/parameter.cpp
@@ -0,0 +1,35 @@
+
+#include "parameter.h"
+
+Parameter sim_parameter(    //��Ҫ�޸Ĳ�����ͬʱ��Ҫ�޸�GPU�˲�����verlet.cu��,��ʱδͬ��
+	20,                    //NUM_ADJFACE
+	20,                    //NUM_PER_VERTEX_SPRING_STRUCT
+	20,                    //NUM_PER_VERTEX_SPRING_BEND
+	-0.05f,              //damp
+	0.3f,                  //mass
+	1.0f / 30.0f,           //dt
+	30.0f,                  //pring_structure
+	1.0f                  //spring_bend
+);
+
+Parameter::Parameter(
+	unsigned int _NUM_PER_VERTEX_ADJ_FACES,
+	unsigned int _NUM_PER_VERTEX_SPRING_STRUCT,
+	unsigned int _NUM_PER_VERTEX_SPRING_BEND,
+	float _damp,
+	float _mass,
+	float _dt,
+	float _spring_structure,
+	float _spring_bend) : 
+	NUM_PER_VERTEX_ADJ_FACES(_NUM_PER_VERTEX_ADJ_FACES),
+	NUM_PER_VERTEX_SPRING_STRUCT(_NUM_PER_VERTEX_SPRING_STRUCT),
+	NUM_PER_VERTEX_SPRING_BEND(_NUM_PER_VERTEX_SPRING_BEND),
+	damp(_damp),
+	mass(_mass),
+	dt(_dt),
+	spring_structure(_spring_structure),
+	spring_bend(_spring_bend)
+{
+}
+
+
diff --git a/parameter.h b/parameter.h
new file mode 100644
index 0000000..a4e577b
--- /dev/null
+++ b/parameter.h
@@ -0,0 +1,31 @@
+#pragma once
+
+
+class Parameter
+{
+public:
+	Parameter(
+		unsigned int _NUM_ADJFACE, 
+		unsigned int _NUM_PER_VERTEX_SPRING_STRUCT, 
+		unsigned int _NUM_PER_VERTEX_SPRING_BEND, 
+		float _damp, 
+		float _mass, 
+		float _dt, 
+		float _spring_structure, 
+		float _spring_bend
+	);
+
+public:
+	unsigned int NUM_PER_VERTEX_ADJ_FACES;
+	unsigned int NUM_PER_VERTEX_SPRING_STRUCT;
+	unsigned int NUM_PER_VERTEX_SPRING_BEND;
+	float damp;
+	float mass;
+	float dt ;
+	float spring_structure;
+	float spring_bend;
+};
+
+extern Parameter sim_parameter;
+
+
diff --git a/refactor.xlsx b/refactor.xlsx
new file mode 100644
index 0000000..2e1d0c5
Binary files /dev/null and b/refactor.xlsx differ
diff --git a/sRTSolver_MergeHead.cpp b/sRTSolver_MergeHead.cpp
new file mode 100644
index 0000000..62d8382
--- /dev/null
+++ b/sRTSolver_MergeHead.cpp
@@ -0,0 +1,59 @@
+
+#include "scene.h"
+#include "Mesh.h"
+#include "ObjLoader.h"
+
+bool DoudouHead_Merge::Enabled (false);
+GLfloat DoudouHead_Merge::model[16];
+DoudouHead_Merge::DoudouHead_Merge()
+{
+
+}
+void DoudouHead_Merge::Init(Mesh& _Pts_Head_obj)
+{
+	Pts_Body_Cor.load("../DoudouHead/Neck_IdxBody.txt");
+	Pts_Head_Cor.load("../DoudouHead/Neck_IdxMerge.txt");
+	Len = Pts_Body_Cor.n_elem;
+	Ones = ones<mat>(Len, 1);
+	Pts_Head = zeros<mat>(Len, 3);
+	for (int i = 0; i < Len; i++)
+	{
+		Pts_Head(i, 0) = _Pts_Head_obj.vertices[Pts_Head_Cor(i)].x;
+		Pts_Head(i, 1) = _Pts_Head_obj.vertices[Pts_Head_Cor(i)].y;
+		Pts_Head(i, 2) = _Pts_Head_obj.vertices[Pts_Head_Cor(i)].z;
+	}
+	Pts_Head = join_horiz(Pts_Head, Ones);
+}
+DoudouHead_Merge::DoudouHead_Merge(Mesh& _Pts_Head_obj)
+{
+	Init(_Pts_Head_obj);
+}
+GLfloat *DoudouHead_Merge::Calc(Vec4s& _Pts_Body_vec)
+{
+	Enabled = true;
+	Pts_Body = zeros<mat>(Len, 3);
+	for (int i = 0; i < Len; i++)
+	{
+		Pts_Body(i, 0) = _Pts_Body_vec[size_t(Pts_Body_Cor(i))].x;
+		Pts_Body(i, 1) = _Pts_Body_vec[size_t(Pts_Body_Cor(i))].y;
+		Pts_Body(i, 2) = _Pts_Body_vec[size_t(Pts_Body_Cor(i))].z;
+	}
+	mat solved = solve(Pts_Head, join_horiz(Pts_Body, Ones)).t();
+
+	mat U, V;
+	vec s;
+	svd(U, s, V, solved.submat(0,0,2,2));
+	float s_value = mean(s) * 1.08;
+	mat sR = s_value*U*V.t();
+	mat T = mean(Pts_Body - Pts_Head.cols(0, 2) * sR.t(), 0).t();
+
+	mat H = join_vert(join_horiz(sR, T), join_horiz(zeros(1, 3), ones(1, 1)));
+
+
+	//cout << solved << endl;
+	for (int i = 0; i < 4; ++i)
+		for (int j = 0; j < 4; ++j)
+			model[i+4*j] = H(i, j);
+
+	return model;
+}
\ No newline at end of file
diff --git a/scene.cpp b/scene.cpp
new file mode 100644
index 0000000..318b9fc
--- /dev/null
+++ b/scene.cpp
@@ -0,0 +1,501 @@
+#include <queue>
+#include <iostream>
+
+#include <GL/glew.h>
+#include <GL/freeglut.h>
+#include <FreeImage.h>
+
+#include <cuda_gl_interop.h>
+
+#define GLEW_STATIC
+
+#include "wglew.h"
+#include "scene.h"
+#include "VAOMesh.h"
+#include "ObjLoader.h"
+
+using namespace std;
+
+// OPENGL�����ĸ��ֲ���declaration
+
+int Scene::oldX = 0, Scene::oldY = 0;
+float Scene::rX = 15, Scene::rY = 0;
+int Scene::state = 1;
+float Scene::dist = -15;
+float Scene::dy = 0;
+GLint Scene::viewport[4];
+GLfloat Scene::model[16];
+GLfloat Scene::view[16];
+GLfloat Scene::projection[16];
+glm::vec3 Scene::Up = glm::vec3(0, 1, 0),
+Scene::Right = glm::vec3(0, 0, 0),
+Scene::viewDir = glm::vec3(0, 0, 0);
+int Scene::selected_index = -1;
+static int current_width;
+static int current_height;
+
+extern int StartNum;
+extern int StopNum;
+int Iter = 0;
+int MaxIteration = 5000;
+int pose = 0;
+int poser = StopNum - StartNum;
+
+static int num_screenshot = 0;
+GLenum GL_MODE = GL_LINE_LOOP;
+
+extern queue<Mesh> BodyQueue;
+extern queue<Vec4s> VQueue;
+extern queue<Vec3s> NQueue;
+
+extern HANDLE hMutex;
+
+
+extern DoudouHead_Merge DoudouHead_Solver;
+
+Scene &Scene::instance()
+{
+	static Scene scene;
+	return scene;
+}
+
+void Scene::initialize(int argc, char **argv)
+{
+	glutInit(&argc, argv);
+	glutInitDisplayMode(GLUT_DOUBLE | GLUT_RGBA | GLUT_DEPTH);
+	glutInitWindowSize(width, height);
+	glutCreateWindow("ViSG_RealTime_DCSim");
+
+	GLenum err = glewInit();
+	if (err != GLEW_OK) {
+		fprintf(stderr, "%s\n", glewGetErrorString(err));
+		return;
+	}
+	wglSwapIntervalEXT(0);  // disable Vertical synchronization
+	glutSetOption(GLUT_ACTION_ON_WINDOW_CLOSE, GLUT_ACTION_GLUTMAINLOOP_RETURNS);
+}
+
+void Scene::render()
+{
+	loadShader(); //InitGL(); //load shader
+
+	glutDisplayFunc(onRender);
+	glutReshapeFunc(OnReshape);
+	glutIdleFunc(OnIdle);
+
+	glutMouseFunc(OnMouseDown);
+	glutMotionFunc(OnMouseMove);
+	glutKeyboardFunc(OnKey);
+	glutCloseFunc(OnShutdown);
+
+	glutMainLoop();
+}
+
+void Scene::RenderBuffer(VAO_Buffer vao_buffer)
+{
+	GLfloat eyeDir[3] = { viewDir.x,viewDir.y,viewDir.z };
+
+	renderShader.Use();
+	glUniformMatrix4fv(renderShader("view"), 1, GL_FALSE, view);
+	if (DoudouHead_Merge::Enabled)
+	{
+		glUniformMatrix4fv(renderShader("model"), 1, GL_FALSE, DoudouHead_Merge::model);
+		DoudouHead_Merge::Enabled = false;
+		//for (int i = 0; i < 16; i++)cout << DoudouHead_Merge::modelview[i] << ' '; cout << endl;
+	}
+	else
+	{
+		glUniformMatrix4fv(renderShader("model"), 1, GL_FALSE, model);   // the platform does not support "glUniformMatrix4dv"
+	}
+	glUniformMatrix4fv(renderShader("projection"), 1, GL_FALSE, projection);
+	glUniform3fv(renderShader("viewPos"), 1, eyeDir);
+
+	//glPointSize(1);
+	glBindVertexArray(vao_buffer.vao);
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, vao_buffer.index_buffer);
+	glBindTexture(GL_TEXTURE_2D, vao_buffer.texture);
+	glDrawElements(GL_TRIANGLES, (GLsizei)vao_buffer.index_size, GL_UNSIGNED_INT, 0);
+	glBindTexture(GL_TEXTURE_2D, 0);
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+	glBindVertexArray(0);
+	renderShader.UnUse();
+}
+
+void Scene::add(VAOMesh& mesh)
+{
+	//add VAOs and Buffers
+	VAO_Buffer tem_vao;
+
+	glGenVertexArrays(1, &tem_vao.vao);
+	glGenBuffers(1, &tem_vao.array_buffer);
+	glGenBuffers(1, &tem_vao.index_buffer);
+	tem_vao.texture = mesh.gl_texture;
+	tem_vao.index_size = mesh.faces.size() * 3;
+	check_GL_error();
+
+	glBindVertexArray(tem_vao.vao);
+	glBindBuffer(GL_ARRAY_BUFFER, tem_vao.array_buffer);
+
+	glBufferData(GL_ARRAY_BUFFER, sizeof(glm::vec4) * mesh.vertices.size() + sizeof(glm::vec2) * mesh.texures.size() + sizeof(glm::vec3)*mesh.normals.size(), NULL, GL_STATIC_DRAW);
+	glBufferSubData(GL_ARRAY_BUFFER, 0, sizeof(glm::vec4) * mesh.vertices.size(), &mesh.vertices[0]);
+	glBufferSubData(GL_ARRAY_BUFFER, sizeof(glm::vec4) * mesh.vertices.size(), sizeof(glm::vec2) * mesh.texures.size(), &mesh.texures[0]);
+	glBufferSubData(GL_ARRAY_BUFFER, sizeof(glm::vec4) * mesh.vertices.size() + sizeof(glm::vec2) * mesh.texures.size(), sizeof(glm::vec3)*mesh.normals.size(), &mesh.normals[0]);
+	check_GL_error();
+
+	glVertexAttribPointer(position, 4, GL_FLOAT, GL_FALSE, sizeof(glm::vec4), 0);
+	glVertexAttribPointer(texture, 2, GL_FLOAT, GL_FALSE, sizeof(glm::vec2), (const GLvoid*)(sizeof(glm::vec4)*mesh.vertices.size()));
+	glVertexAttribPointer(normal, 3, GL_FLOAT, GL_FALSE, sizeof(glm::vec3), (const GLvoid*)(sizeof(glm::vec4)*mesh.vertices.size() + sizeof(glm::vec2)*mesh.texures.size()));
+
+	glEnableVertexAttribArray(position);
+	glEnableVertexAttribArray(texture);
+	glEnableVertexAttribArray(normal);
+	glBindBuffer(GL_ARRAY_BUFFER, 0);
+
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, tem_vao.index_buffer);
+	glBufferData(GL_ELEMENT_ARRAY_BUFFER, sizeof(GLuint) * mesh.faces.size() * 3, &mesh.faces[0], GL_STATIC_DRAW);
+	glBindBuffer(GL_ELEMENT_ARRAY_BUFFER, 0);
+
+	glBindVertexArray(0);
+
+	obj_vaos.push_back(tem_vao); // add new vao to the scene
+	mesh.vbo = tem_vao;
+}
+
+void Scene::update_simulating_cloth(Cloth &cloth)
+{
+	simulator.update_cloth(cloth);
+}
+
+void Scene::update_simulating_body(Mesh &body)
+{
+	simulator.update_body(body);
+}
+
+void Scene::check_GL_error()
+{
+	assert(glGetError() == GL_NO_ERROR);
+}
+
+void Scene::loadShader()
+{
+	//set light
+	GLfloat lightPos[3] = { 0, 0.0f, 10.0f };
+	GLfloat lightColor[3] = { 0.8f, 0.8f, 0.8f };
+	GLfloat objectColor[3] = { 0.8f, 0.8f, 0.8f };
+
+	renderShader.LoadFromFile(GL_VERTEX_SHADER, "shaders/render.vert");
+	renderShader.LoadFromFile(GL_FRAGMENT_SHADER, "shaders/render.frag");
+	renderShader.CreateAndLinkProgram();
+
+	renderShader.Use();
+	renderShader.AddUniform("color");
+	renderShader.AddUniform("model");
+	renderShader.AddUniform("view");
+	renderShader.AddUniform("projection");
+	renderShader.AddUniform("lightPos");
+	glUniform3fv(renderShader("lightPos"), 1, lightPos);
+	renderShader.AddUniform("viewPos");
+	renderShader.AddUniform("lightColor");
+	glUniform3fv(renderShader("lightColor"), 1, lightColor);
+	renderShader.AddUniform("objectColor");
+	glUniform3fv(renderShader("objectColor"), 1, objectColor);
+	renderShader.UnUse();
+
+	check_GL_error();
+	glEnable(GL_DEPTH_TEST);
+}
+
+void Scene::screenshot()
+{
+	// Make the BYTE array, factor of 3 because it's RBG.
+	BYTE *pixels = new BYTE[3 * current_width * current_height];
+
+	glReadPixels(0, 0, current_width, current_height, GL_BGR, GL_UNSIGNED_BYTE, pixels);
+
+	// Convert to FreeImage format & save to file
+	FIBITMAP *image = FreeImage_ConvertFromRawBits(pixels, current_width, current_height, 3 * current_width, 24, 0x0000FF, 0xFF0000, 0x00FF00, false);
+	string str = "../screenshot/screenshot";
+	str += to_string(num_screenshot++);
+	str += ".bmp";
+
+	FreeImage_Save(FIF_BMP, image, str.c_str(), 0);
+
+	// Free resources
+	FreeImage_Unload(image);
+	delete[] pixels;
+	cout << str << " saved successfully!" << endl;
+}
+
+// OPENGL�����ĸ��ֺ���
+void Scene::DrawGrid()
+{
+	const int GRID_SIZE = 10;
+	glBegin(GL_LINES);
+	glColor3f(0.5f, 0.5f, 0.5f);
+	for (int i = -GRID_SIZE; i <= GRID_SIZE; i++)
+	{
+		glVertex3f((float)i, -2, (float)-GRID_SIZE);
+		glVertex3f((float)i, -2, (float)GRID_SIZE);
+
+		glVertex3f((float)-GRID_SIZE, -2, (float)i);
+		glVertex3f((float)GRID_SIZE, -2, (float)i);
+	}
+
+	glEnd();
+
+}
+
+
+void Scene::RenderGPU_CUDA()
+{
+	if (Iter > MaxIteration)
+	{
+		if (VQueue.size() > 1)
+		{
+			//WaitForSingleObject(hMutex, INFINITE);
+			update_body_data();
+			//ReleaseMutex(hMutex);
+			MaxIteration = 25;
+			Iter = 0;
+		}
+	}
+
+	simulator.simulate();
+	simulator.visulize();
+
+	for (int i = 0; i < obj_vaos.size() - 1; ++i)
+	{
+		auto vao = obj_vaos[i];
+		RenderBuffer(vao);
+
+	}
+	DoudouHead_Merge::Enabled = true;
+	RenderBuffer(obj_vaos.back());
+
+	Iter++;
+
+}
+void Scene::onRender()
+{
+	getFPS();
+	glClear(GL_COLOR_BUFFER_BIT | GL_DEPTH_BUFFER_BIT);
+	glLoadIdentity();
+	glTranslatef(0, dy, 0);
+	glTranslatef(0, 0, dist);
+	glRotatef(rX, 1, 0, 0);
+	glRotatef(rY, 0, 1, 0);
+
+	for (int i = 0; i < 16; ++i)
+		if (i == 0 || i == 5 || i == 10 || i == 15)
+			model[i] = 1;
+		else
+			model[i] = 0;
+	glGetFloatv(GL_MODELVIEW_MATRIX, view);
+	glGetFloatv(GL_PROJECTION_MATRIX, projection);
+	viewDir.x = (float)-view[2];
+	viewDir.y = (float)-view[6];
+	viewDir.z = (float)-view[10];
+	Right = glm::cross(viewDir, Up);
+
+	//������Χ�У�AABB TREE
+	//if (Scene::instance().h_bvh)
+	//{
+	//	Scene::instance().h_bvh->draw(Scene::instance().h_bvh->get_root());
+	//}
+	//������������������
+	//Scene::instance().simulation->cuda_spring->draw();
+
+	//debug,������⵽��ײ�ĵ�
+	//Scene::instance().simulation->draw_collided_vertex();
+
+	Scene::instance().RenderGPU_CUDA();
+
+	glutSwapBuffers();
+}
+void Scene::OnReshape(int nw, int nh)
+{
+	current_width = nw;
+	current_height = nh;
+	glViewport(0, 0, nw, nh);
+	glMatrixMode(GL_PROJECTION);
+	glLoadIdentity();
+	gluPerspective(30, (GLfloat)nw / (GLfloat)nh, 0.1f, 100.0f);
+
+	glMatrixMode(GL_MODELVIEW);
+	glLoadIdentity();
+
+	glutPostRedisplay();
+}
+
+void Scene::OnIdle()
+{
+	glutPostRedisplay();
+}
+
+void Scene::OnMouseMove(int x, int y)
+{
+	if (selected_index == -1) {
+		if (state == 0)
+			dist *= (1 + (y - oldY) / 60.0f);
+		else
+		{
+			rY += (x - oldX) / 5.0f;
+			rX += (y - oldY) / 5.0f;
+		}
+	}
+	else {
+		float delta = 1500 / abs(dist);
+		float valX = (x - oldX) / delta;
+		float valY = (oldY - y) / delta;
+		if (abs(valX) > abs(valY))
+			glutSetCursor(GLUT_CURSOR_LEFT_RIGHT);
+		else
+			glutSetCursor(GLUT_CURSOR_UP_DOWN);
+
+
+		glm::vec4 *ptr = (glm::vec4*)glMapBuffer(GL_ARRAY_BUFFER, GL_READ_ONLY);
+		glm::vec4 oldVal = ptr[selected_index];
+		glUnmapBuffer(GL_ARRAY_BUFFER); // unmap it after use
+
+		glm::vec4 newVal;
+		newVal.w = 1;
+		// if the pointer is valid(mapped), update VBO
+		if (ptr) {
+			// modify buffer data				
+			oldVal.x += Right[0] * valX;
+
+			float newValue = oldVal.y + Up[1] * valY;
+			if (newValue > 0)
+				oldVal.y = newValue;
+			oldVal.z += Right[2] * valX + Up[2] * valY;
+			newVal = oldVal;
+		}
+
+	}
+	oldX = x;
+	oldY = y;
+
+	glutPostRedisplay();
+}
+
+void Scene::OnMouseDown(int button, int s, int x, int y)
+{
+	if (s == GLUT_DOWN)
+	{
+		oldX = x;
+		oldY = y;
+		int window_y = (height - y);
+		float norm_y = float(window_y) / float(height / 2.0);
+		int window_x = x;
+		float norm_x = float(window_x) / float(width / 2.0);
+
+		float winZ = 0;
+		glReadPixels(x, height - y, 1, 1, GL_DEPTH_COMPONENT, GL_FLOAT, &winZ);
+		if (winZ == 1)
+			winZ = 0;
+		double objX = 0, objY = 0, objZ = 0;
+		GLdouble MV1[16], P1[16];
+		gluUnProject(window_x, window_y, winZ, MV1, P1, viewport, &objX, &objY, &objZ);
+		glm::vec3 pt(objX, objY, objZ);
+		int i = 0;
+
+	}
+
+	if (button == GLUT_MIDDLE_BUTTON)
+		state = 0;
+	else
+		state = 1;
+
+	if (s == GLUT_UP) {
+		selected_index = -1;
+		glutSetCursor(GLUT_CURSOR_INHERIT);
+	}
+}
+
+void Scene::OnKey(unsigned char key, int, int)
+{
+	switch (key)
+	{
+	case 'w':
+	case 'W':dy -= 0.1f; break;
+	case 'S':
+	case 's':dy += 0.1f; break;
+	case 'x':
+	case 'X':
+		Scene::instance().screenshot(); 
+		break;
+	case 'M':
+	case 'm':
+		if (GL_MODE == GL_LINE_LOOP)
+			GL_MODE = GL_TRIANGLES;
+		else if (GL_MODE == GL_TRIANGLES)
+			GL_MODE = GL_POINTS;
+		else
+			GL_MODE = GL_LINE_LOOP;
+		break;
+	default:
+		break;
+	}
+
+	glutPostRedisplay();
+}
+
+void Scene::OnShutdown()
+{
+
+}
+
+void Scene::update_body_data()
+{
+	//WaitForSingleObject(hMutex, INFINITE);
+
+	VAOMesh now_body = template_body;
+	now_body.vertices = VQueue.front();
+	now_body.normals = NQueue.front();
+
+	VQueue.pop();
+	NQueue.pop();
+
+	now_body.scale(0.3f);
+	now_body.translate(0.0f, 0.6f, 0.0f);
+
+	//////////////////������һֻ���ڵ���
+	//int index = (now_body.vertices[3367].y < now_body.vertices[6758].y) ? 3367 : 6758;
+	//float dy = now_body.vertices[index].y - Scene::instance().template_body.vertices[index].y;
+	//for (int i = 0;i < now_body.vertices.size();i++)
+	//	now_body.vertices[i].y -= dy;
+	////////////////////////////////////////////
+
+	update_simulating_body(now_body);
+
+	simulator.ccd();
+	simulator.visulize();
+
+	cudaError_t cudaStatus = cudaGraphicsGLRegisterBuffer(&body_vbo_resource, now_body.vbo.array_buffer, cudaGraphicsMapFlagsWriteDiscard);   	//register vbo
+
+
+	if (cudaStatus != cudaSuccess)
+		fprintf(stderr, "register failed\n");
+
+	size_t num_bytes;
+	cudaStatus = cudaGraphicsMapResources(1, &body_vbo_resource, 0);
+	cudaStatus = cudaGraphicsResourceGetMappedPointer((void **)&body_p_vertex, &num_bytes, body_vbo_resource);
+	body_p_normal = (glm::vec3*)((float*)body_p_vertex + 4 * this->template_body.vertices.size() + 2 * this->template_body.texures.size());   // ��ȡnormalλ��ָ��
+
+	const size_t vertices_bytes = sizeof(glm::vec4)  * this->template_body.vertices.size();       //�������
+	cudaStatus = cudaMemcpy(body_p_vertex, &now_body.vertices[0], vertices_bytes, cudaMemcpyHostToDevice);
+
+	const size_t normal_bytes = sizeof(glm::vec3)  * this->template_body.normals.size();       //�������
+	cudaStatus = cudaMemcpy(body_p_normal, &now_body.normals[0], normal_bytes, cudaMemcpyHostToDevice);
+	cudaStatus = cudaGraphicsUnmapResources(1, &body_vbo_resource, 0);
+	
+
+	for (int i = 0; i < obj_vaos.size() - 1; ++i)
+	{
+		auto vao = obj_vaos[i];
+		RenderBuffer(vao);
+	}
+	DoudouHead_Solver.Calc(now_body.vertices);
+	RenderBuffer(obj_vaos.back());
+}
+
diff --git a/scene.h b/scene.h
new file mode 100644
index 0000000..e23c6aa
--- /dev/null
+++ b/scene.h
@@ -0,0 +1,115 @@
+#pragma once
+
+#define GLEW_STATIC
+
+#include <vector>
+#include <armadillo>
+
+#include <cuda_runtime.h>
+
+#include "Simulator.h"
+#include "GLSLShader.h"
+#include "VAOMesh.h"
+
+class BVHAccel;
+
+
+using namespace arma;
+class DoudouHead_Merge
+{
+public:
+	static GLfloat model[16];
+	static bool Enabled;
+private:
+	mat Pts_Head;
+	mat Pts_Head_Cor;
+	mat Pts_Body;
+	mat Pts_Body_Cor;
+	mat Ones;
+	uword Len;
+public:
+	DoudouHead_Merge();
+	void Init(Mesh& _Pts_Head_obj);
+	DoudouHead_Merge(Mesh& _Pts_Head_obj);
+	GLfloat *Calc(Vec4s& _Pts_Body_vec);
+};
+
+//singleton
+class Scene
+{
+public:
+	// ����ģʽ
+	static Scene &instance();
+	~Scene() { }  
+
+	void initialize(int argc, char **argv); 
+		
+	//add objects,bind VAOs 
+	void add(VAOMesh& mesh);   
+
+	void update_simulating_cloth(Cloth &cloth);
+	void update_simulating_body(Mesh &body);
+
+	void initiate_body_template(const VAOMesh &body)
+	{
+		template_body = body; 
+	}
+
+	void render();
+
+	void update_body_data();
+private:
+	Scene() { }  
+
+	inline void check_GL_error();
+	void loadShader();
+
+private:
+	void screenshot();
+	void DrawGrid();                  // OPENGL�����ĸ��ֺ���
+	void RenderGPU_CUDA();
+
+
+	static void onRender();
+	static void OnReshape(int nw, int nh);
+	static void OnIdle();
+	static void OnMouseMove(int x, int y);
+	static void OnMouseDown(int button, int s, int x, int y);
+	static void OnKey(unsigned char key, int, int);
+	static void OnShutdown();
+
+private:
+	VAOMesh template_body;
+
+	GLSLShader renderShader;
+	enum attributes { position, texture, normal };
+
+	void RenderBuffer(VAO_Buffer vao_buffer);
+	vector<VAO_Buffer> obj_vaos;
+
+	Simulator simulator;
+
+	// ָ��OPENGL buffer��vertex�ĵ�ַ
+	glm::vec4 *body_p_vertex;       
+	// ָ��OPENGL buffer��normal�ĵ�ַ
+	glm::vec3 *body_p_normal;           
+
+	cudaGraphicsResource *body_vbo_resource;
+
+	// OPENGL�����ĸ��ֲ���declaration
+	static int oldX, oldY;    
+	static float rX, rY;
+	static int state;
+	static float dist, dy;
+	static GLint viewport[4];
+	static GLfloat view[16];
+	static GLfloat model[16];
+	static GLfloat projection[16];
+	static glm::vec3 Up, Right, viewDir;
+	static int selected_index;
+	static const int width = 1024, height = 1024;
+
+};
+
+
+
diff --git a/simulator.vcxproj b/simulator.vcxproj
new file mode 100644
index 0000000..1aef673
--- /dev/null
+++ b/simulator.vcxproj
@@ -0,0 +1,224 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project DefaultTargets="Build" ToolsVersion="14.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup Label="ProjectConfigurations">
+    <ProjectConfiguration Include="Debug|Win32">
+      <Configuration>Debug</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Debug|x64">
+      <Configuration>Debug</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|Win32">
+      <Configuration>Release</Configuration>
+      <Platform>Win32</Platform>
+    </ProjectConfiguration>
+    <ProjectConfiguration Include="Release|x64">
+      <Configuration>Release</Configuration>
+      <Platform>x64</Platform>
+    </ProjectConfiguration>
+  </ItemGroup>
+  <PropertyGroup Label="Globals">
+    <ProjectGuid>{8A537700-E52B-4CE2-A456-D47B6AD5B701}</ProjectGuid>
+    <Keyword>Win32Proj</Keyword>
+    <RootNamespace>simulator</RootNamespace>
+    <WindowsTargetPlatformVersion>8.1</WindowsTargetPlatformVersion>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.Default.props" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>true</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <CharacterSet>MultiByte</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="Configuration">
+    <ConfigurationType>Application</ConfigurationType>
+    <UseDebugLibraries>false</UseDebugLibraries>
+    <PlatformToolset>v140</PlatformToolset>
+    <WholeProgramOptimization>true</WholeProgramOptimization>
+    <CharacterSet>Unicode</CharacterSet>
+  </PropertyGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.props" />
+  <ImportGroup Label="ExtensionSettings">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.props" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Label="PropertySheets" Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <ImportGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'" Label="PropertySheets">
+    <Import Project="$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props" Condition="exists('$(UserRootDir)\Microsoft.Cpp.$(Platform).user.props')" Label="LocalAppDataPlatform" />
+  </ImportGroup>
+  <PropertyGroup Label="UserMacros" />
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\inc;D:\Program Files\Visual Studio2015\VC\include</IncludePath>
+    <LibraryPath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\Lib\x64;D:\Program Files\Visual Studio2015\VC\lib</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <LinkIncremental>true</LinkIncremental>
+    <IncludePath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\inc;D:\Program Files\Visual Studio2015\VC\include</IncludePath>
+    <LibraryPath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\Lib\x64;D:\Program Files\Visual Studio2015\VC\lib</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\inc;D:\Program Files\Visual Studio2015\VC\include</IncludePath>
+    <LibraryPath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\Lib\x64;D:\Program Files\Visual Studio2015\VC\lib</LibraryPath>
+  </PropertyGroup>
+  <PropertyGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <LinkIncremental>false</LinkIncremental>
+    <IncludePath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\inc;D:\Program Files\Visual Studio2015\VC\include</IncludePath>
+    <LibraryPath>C:\Program Files\Microsoft SDKs\Kinect\v2.0_1409\Lib\x64;D:\Program Files\Visual Studio2015\VC\lib</LibraryPath>
+  </PropertyGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>cudart.lib;FreeImage.lib;glew32s.lib;freeglut.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>libcmt.lib;</IgnoreSpecificDefaultLibraries>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">
+    <ClCompile>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <WarningLevel>Level3</WarningLevel>
+      <Optimization>Disabled</Optimization>
+      <PreprocessorDefinitions>WIN32;_DEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <CompileAs>Default</CompileAs>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <AdditionalDependencies>cudart.lib;FreeImage.lib;glew32s.lib;freeglut.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>libcmt.lib;</IgnoreSpecificDefaultLibraries>
+    </Link>
+    <CudaCompile>
+      <FastMath>true</FastMath>
+      <AdditionalOptions>
+ %(AdditionalOptions)</AdditionalOptions>
+      <AdditionalCompilerOptions>-Wno-deprecated-gpu-targets </AdditionalCompilerOptions>
+    </CudaCompile>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|Win32'">
+    <ClCompile>
+      <WarningLevel>Level3</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>cudart.lib;FreeImage.lib;glew32s.lib;freeglut.lib;%(AdditionalDependencies)</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>libcmt.lib;</IgnoreSpecificDefaultLibraries>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemDefinitionGroup Condition="'$(Configuration)|$(Platform)'=='Release|x64'">
+    <ClCompile>
+      <WarningLevel>Level2</WarningLevel>
+      <PrecompiledHeader>NotUsing</PrecompiledHeader>
+      <Optimization>MaxSpeed</Optimization>
+      <FunctionLevelLinking>true</FunctionLevelLinking>
+      <IntrinsicFunctions>true</IntrinsicFunctions>
+      <PreprocessorDefinitions>WIN32;NDEBUG;_CONSOLE;%(PreprocessorDefinitions)</PreprocessorDefinitions>
+      <SDLCheck>true</SDLCheck>
+      <RuntimeLibrary>MultiThreadedDLL</RuntimeLibrary>
+      <DisableSpecificWarnings>4819</DisableSpecificWarnings>
+    </ClCompile>
+    <Link>
+      <SubSystem>Console</SubSystem>
+      <GenerateDebugInformation>true</GenerateDebugInformation>
+      <EnableCOMDATFolding>true</EnableCOMDATFolding>
+      <OptimizeReferences>true</OptimizeReferences>
+      <AdditionalDependencies>cudart.lib;FreeImage.lib;glew32s.lib;freeglut.lib</AdditionalDependencies>
+      <IgnoreSpecificDefaultLibraries>libcmt.lib;</IgnoreSpecificDefaultLibraries>
+    </Link>
+  </ItemDefinitionGroup>
+  <ItemGroup>
+    <Text Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="bvh\Bbox.h" />
+    <ClInclude Include="bvh\BRTreeNode.h" />
+    <ClInclude Include="bvh\BVHAccel.h" />
+    <ClInclude Include="bvh\BVHBuilder.h" />
+    <ClInclude Include="bvh\Primitive.h" />
+    <ClInclude Include="Cloth.h" />
+    <ClInclude Include="Common.h" />
+    <ClInclude Include="detectBody.h" />
+    <ClInclude Include="generateBody.h" />
+    <ClInclude Include="GLSLShader.h" />
+    <ClInclude Include="kdtree.h" />
+    <ClInclude Include="KinectJointFilter.h" />
+    <ClInclude Include="Mesh.h" />
+    <ClInclude Include="ObjLoader.h" />
+    <ClInclude Include="parameter.h" />
+    <ClInclude Include="scene.h" />
+    <ClInclude Include="Simulator.h" />
+    <ClInclude Include="SpringsBuilder.h" />
+    <ClInclude Include="Utilities.h" />
+    <ClInclude Include="Verlet.h" />
+    <ClInclude Include="VAOMesh.h" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="bvh\Bbox.cpp" />
+    <ClCompile Include="generateBody.cpp" />
+    <ClCompile Include="GLSLShader.cpp" />
+    <ClCompile Include="Mesh.cpp" />
+    <ClCompile Include="ObjLoader.cpp" />
+    <ClCompile Include="Utilities.cpp" />
+    <ClCompile Include="kdtree.cpp" />
+    <ClCompile Include="KinectJointFilter.cpp" />
+    <ClCompile Include="main.cpp" />
+    <ClCompile Include="parameter.cpp" />
+    <ClCompile Include="scene.cpp" />
+    <ClCompile Include="SpringsBuilder.cpp" />
+    <ClCompile Include="sRTSolver_MergeHead.cpp" />
+  </ItemGroup>
+  <ItemGroup>
+    <CudaCompile Include="bvh\BVHAccel.cpp" />
+    <CudaCompile Include="bvh\BVHBuilder.cu" />
+    <CudaCompile Include="Simulator.cu" />
+    <CudaCompile Include="verlet.cu">
+      <FileType>Document</FileType>
+      <GenerateRelocatableDeviceCode Condition="'$(Configuration)|$(Platform)'=='Debug|Win32'">false</GenerateRelocatableDeviceCode>
+      <GenerateRelocatableDeviceCode Condition="'$(Configuration)|$(Platform)'=='Debug|x64'">false</GenerateRelocatableDeviceCode>
+    </CudaCompile>
+  </ItemGroup>
+  <Import Project="$(VCTargetsPath)\Microsoft.Cpp.targets" />
+  <ImportGroup Label="ExtensionTargets">
+    <Import Project="$(VCTargetsPath)\BuildCustomizations\CUDA 8.0.targets" />
+  </ImportGroup>
+</Project>
\ No newline at end of file
diff --git a/simulator.vcxproj.filters b/simulator.vcxproj.filters
new file mode 100644
index 0000000..9bfc19a
--- /dev/null
+++ b/simulator.vcxproj.filters
@@ -0,0 +1,140 @@
+﻿<?xml version="1.0" encoding="utf-8"?>
+<Project ToolsVersion="4.0" xmlns="http://schemas.microsoft.com/developer/msbuild/2003">
+  <ItemGroup>
+    <Filter Include="源文件">
+      <UniqueIdentifier>{4FC737F1-C7A5-4376-A066-2A32D752A2FF}</UniqueIdentifier>
+      <Extensions>cpp;c;cc;cxx;def;odl;idl;hpj;bat;asm;asmx</Extensions>
+    </Filter>
+    <Filter Include="头文件">
+      <UniqueIdentifier>{93995380-89BD-4b04-88EB-625FBE52EBFB}</UniqueIdentifier>
+      <Extensions>h;hpp;hxx;hm;inl;inc;xsd</Extensions>
+    </Filter>
+    <Filter Include="资源文件">
+      <UniqueIdentifier>{67DA6AB6-F800-4c08-8B7A-83BB121AAD01}</UniqueIdentifier>
+      <Extensions>rc;ico;cur;bmp;dlg;rc2;rct;bin;rgs;gif;jpg;jpeg;jpe;resx;tiff;tif;png;wav;mfcribbon-ms</Extensions>
+    </Filter>
+  </ItemGroup>
+  <ItemGroup>
+    <Text Include="ReadMe.txt" />
+  </ItemGroup>
+  <ItemGroup>
+    <ClInclude Include="GLSLShader.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="scene.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="parameter.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="kdtree.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="generateBody.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="detectBody.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="KinectJointFilter.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Utilities.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="bvh\BVHBuilder.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Verlet.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Simulator.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="SpringsBuilder.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="bvh\BVHAccel.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="bvh\BRTreeNode.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="bvh\Bbox.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Common.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="bvh\Primitive.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Mesh.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="ObjLoader.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="VAOMesh.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+    <ClInclude Include="Cloth.h">
+      <Filter>头文件</Filter>
+    </ClInclude>
+  </ItemGroup>
+  <ItemGroup>
+    <ClCompile Include="main.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="scene.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="GLSLShader.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="kdtree.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="generateBody.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="KinectJointFilter.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="sRTSolver_MergeHead.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="parameter.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="SpringsBuilder.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="bvh\Bbox.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="Utilities.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="Mesh.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+    <ClCompile Include="ObjLoader.cpp">
+      <Filter>源文件</Filter>
+    </ClCompile>
+  </ItemGroup>
+  <ItemGroup>
+    <CudaCompile Include="verlet.cu">
+      <Filter>源文件</Filter>
+    </CudaCompile>
+    <CudaCompile Include="bvh\BVHBuilder.cu">
+      <Filter>源文件</Filter>
+    </CudaCompile>
+    <CudaCompile Include="Simulator.cu">
+      <Filter>源文件</Filter>
+    </CudaCompile>
+    <CudaCompile Include="bvh\BVHAccel.cpp">
+      <Filter>源文件</Filter>
+    </CudaCompile>
+  </ItemGroup>
+</Project>
\ No newline at end of file
diff --git a/verlet.cu b/verlet.cu
new file mode 100644
index 0000000..14ba36a
--- /dev/null
+++ b/verlet.cu
@@ -0,0 +1,382 @@
+
+#include <iostream>
+
+#include <cuda.h>
+#include <device_functions.h>
+#include <cuda_runtime.h>
+#include <device_launch_parameters.h>
+
+#include <glm/glm.hpp>
+
+#include "Common.h"
+#include "parameter.h"
+#include "./bvh/BVHAccel.h"
+#include "Verlet.h"
+
+
+//physics parameter,��Ҫ�޸Ĳ���������ͬʱ�޸�parameter.cpp
+__device__ float spring_structure = 30.0f;
+__device__ float spring_bend = 1.0f;
+__device__ float damp = -0.05f;  //�ı��ֵȷʵ���Լ�С��������
+__device__ float mass = 0.3f;
+__device__ float g = 0.000981/3.0f;
+__device__ float dt = 1.0f /25.0f;
+__device__ unsigned int NUM_PER_VERTEX_ADJ_FACES = 20;
+__device__ unsigned int NUM_PER_VERTEX_SPRING_STRUCT = 20;
+__device__ unsigned int NUM_PER_VERTEX_SPRING_BEND = 20;
+__device__ float ccd_coef = 0.1f;  //0~1�����Ե���ccd��ײϵ��
+__device__ float response = 0.001f;
+
+__device__ uint32 Mode = 4;    // 0 for normal penalty ;1 for weight center penalty ; 2 for projection penalty ; 3 for weighted penalty
+///////////////////////////////////////////////////////
+__device__ void collision_response(
+	const BVHAccel &tree,
+	glm::vec3 &force, glm::vec3 &pos_cur, glm::vec3 &pos_lst)
+{
+	int idx_pri;
+	bool inter = tree.intersect(pos_cur, idx_pri);
+	if (inter)     
+	{
+		glm::vec3 normal;
+		float dist = tree.curpri(idx_pri).distance_to(pos_cur, normal);
+		if (dist < 0)
+		{
+			dist = 8.0 * glm::abs(dist);    //collision response with penalty force
+			glm::vec3 temp = dist * normal;
+			force = force + temp;
+			pos_lst = pos_cur;
+		}
+	}
+}
+
+__device__ void collision_response_projection(
+	const BVHAccel &tree, 
+	glm::vec3 &force, glm::vec3 &pos_cur, glm::vec3 &pos_lst,
+	int idx, glm::vec3 *dir_collision_force)
+{
+	int idx_pri;
+	bool inter = tree.intersect(pos_cur, idx_pri);
+	if (inter)
+	{
+		glm::vec3 normal;
+		float dist = tree.curpri(idx_pri).distance_to(pos_cur, normal);
+
+		// ����ÿ����Ҫ����normal���Ƿ�ɱ�����primitive��
+		// �᲻��Ӱ�쵽gpu����ʱ������ȡ�ڴ��ٶȣ�primitive���
+		if (dist < 0)
+		{
+			dist = glm::abs(dist)+ response;    // //collision response with penalty position
+			pos_cur += dist * normal;
+
+			pos_lst = pos_cur;
+			dir_collision_force[idx] = normal;
+		}
+		else
+			dir_collision_force[idx] = glm::vec3(0.0);
+
+	}
+	else
+		dir_collision_force[idx] = glm::vec3(0.0);
+
+}
+
+__device__ void ccd_response_projection(
+	const BVHAccel &tree,
+	glm::vec3 &pos_cur, glm::vec3 &pos_lst,
+	int idx, glm::vec3 *dir_collision_force)
+{
+	int idx_pri;
+	//bool inter = intersect(leaf_nodes, internal_nodes, pos_cur, idx_pri);
+	//bool inter = nearestIntersect(leaf_nodes, internal_nodes, pos_cur, idx_pri);
+	bool inter = tree.coplanarIntersect(pos_cur, idx_pri);
+	if (inter)
+	{
+		glm::vec3 normal;
+		float dist = tree.curpri(idx_pri).distance_to(pos_cur, normal);
+		if (dist < 0)
+		{
+			pos_lst = pos_cur;
+			if (Mode==0)
+			{
+				dist = glm::abs(dist) + 0.02f;    //collision response with penalty displacement
+				pos_cur += dist * normal;
+			}
+			else if(Mode == 1)	
+				pos_cur += tree.curpri(idx_pri).get_center() - pos_cur;
+			else if(Mode == 2){
+				glm::vec3 PriToPoint = pos_cur - tree.lstpri(idx_pri).v0();
+				float d = glm::dot(PriToPoint , tree.lstpri(idx_pri).get_normal());
+				glm::vec3 ProjectInPri = pos_cur - d * tree.lstpri(idx_pri).get_normal();
+				glm::vec3 dx0 = ProjectInPri - tree.lstpri(idx_pri).v0();
+				glm::vec3 dx1 = ProjectInPri - tree.lstpri(idx_pri).v1();
+				glm::vec3 dx2 = ProjectInPri - tree.lstpri(idx_pri).v2();
+				pos_cur = dx0 + tree.curpri(idx_pri).v0()
+					+ dx1 + tree.curpri(idx_pri).v1()
+					+ dx2 + tree.curpri(idx_pri).v2();
+				pos_cur /= 3.0f;
+			}
+			else if (Mode == 3)
+			{
+				glm::vec3 dx0 = pos_cur - tree.lstpri(idx_pri).v0() ;
+				glm::vec3 dx1 = pos_cur - tree.lstpri(idx_pri).v1();
+				glm::vec3 dx2 = pos_cur - tree.lstpri(idx_pri).v2();
+				pos_cur = dx0 + tree.curpri(idx_pri).v0()
+					+ dx1 + tree.curpri(idx_pri).v1()
+					+ dx2 + tree.curpri(idx_pri).v2();
+				pos_cur /= 3.0f;
+			}
+
+			else if (Mode == 4) {
+				glm::vec3 PriToPoint = pos_cur - tree.lstpri(idx_pri).v0();
+				float distance = glm::dot(PriToPoint, tree.lstpri(idx_pri).get_normal());
+				glm::vec3 ProjectInPri = pos_cur - distance*tree.lstpri(idx_pri).get_normal();
+				float a = tree.lstpri(idx_pri).v1().x - tree.lstpri(idx_pri).v0().x;
+				float b = tree.lstpri(idx_pri).v2().x - tree.lstpri(idx_pri).v0().x;
+				float c = tree.lstpri(idx_pri).v1().y - tree.lstpri(idx_pri).v0().y;
+				float d = tree.lstpri(idx_pri).v2().y - tree.lstpri(idx_pri).v0().y;
+				float x = ProjectInPri.x - tree.lstpri(idx_pri).v0().x;
+				float y	= ProjectInPri.y - tree.lstpri(idx_pri).v0().y;
+				float lamda1 = (d * x - b * y) / (a * d - b * c);
+				float lamda2 = (c * x - a * y) / (b * c - a * d);
+
+				//cout<<lamda1<<','<<lamda2<<std::endl;
+				pos_cur = tree.curpri(idx_pri).v0()
+					+ lamda1 * (tree.curpri(idx_pri).v1() - tree.curpri(idx_pri).v0())
+					+ lamda2 * (tree.curpri(idx_pri).v2() - tree.curpri(idx_pri).v0())
+					+ glm::abs(distance) * tree.curpri(idx_pri).get_normal();
+			}
+			
+			pos_lst = pos_lst + (pos_cur - pos_lst) * (1.0f-ccd_coef);
+
+			dir_collision_force[idx] = glm::vec3(0.0);
+		}
+		else
+			dir_collision_force[idx] = glm::vec3(0.0);
+		/*
+		tree.curpri(idx_pri).intersect(pos_cur, dist, normal);
+		dist = abs(dist);
+		pos_lst = pos_cur;
+		dist = dist + 0.001;    //collision response with penalty displacement
+		pos_cur += dist*normal;
+		pos_lst = pos_lst + (pos_cur - pos_lst)*(1.0f - ccd_coef);
+		dir_collision_force[idx] = glm::vec3(0.0);*/
+
+	}
+	else
+		dir_collision_force[idx] = glm::vec3(0.0);
+
+}
+
+//debug
+__device__ bool collide(
+	const BVHAccel &tree,
+	glm::vec3 &force, glm::vec3 &pos_cur, glm::vec3 &pos_lst,
+	int idx, glm::vec3 *dir_collision_force)
+{
+	int idx_pri;
+	bool inter = tree.intersect(pos_cur, idx_pri);
+	if (inter)
+	{
+		glm::vec3 normal;
+		float dist = tree.curpri(idx_pri).distance_to(pos_cur, normal);
+		if (dist < 0)
+		{
+			return true;
+		}
+		else
+			return false;
+
+	}
+	else
+		return false;
+}
+
+__global__ void get_face_normal(
+	const unsigned int num_faces,
+	glm::vec3 *x_cur_in,
+	unsigned int *adj_face_to_vertices,
+	glm::vec3 *dir_face_normals)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	unsigned int max_thread = num_faces;
+	if (index >= max_thread)
+		return;
+
+
+	unsigned int i0 = index * 3;
+	unsigned int i1 = i0 + 1;
+	unsigned int i2 = i0 + 2;
+
+	i0 = adj_face_to_vertices[i0];
+	i1 = adj_face_to_vertices[i1];
+	i2 = adj_face_to_vertices[i2];
+
+	glm::vec3 v0 = x_cur_in[i0];
+	glm::vec3 v1 = x_cur_in[i1];
+	glm::vec3 v2 = x_cur_in[i2];
+
+	glm::vec3 side1 = v1 - v0;
+	glm::vec3 side2 = v2 - v0;
+	glm::vec3 normal = glm::normalize(glm::cross(side1, side2));
+
+	dir_face_normals[index] = normal;
+
+}
+
+__device__ glm::vec3 get_spring_force(
+	int index, 
+	glm::vec3 *x_cur_in, glm::vec3 *x_lst_in, glm::vec3 *x_orignal,
+	unsigned int *adj_spring, unsigned int num_per_vertex_spring, 
+	glm::vec3 pos_cur, glm::vec3 vel, float k_spring)
+{
+	glm::vec3 force(0.0);
+	unsigned int first_neigh = index * num_per_vertex_spring;   //����һ������UINT_MAXΪ������־
+	unsigned int time = 0;
+	for (unsigned int k = first_neigh; 
+		adj_spring[k] < UINT_MAX && time < num_per_vertex_spring; 
+		k++, time++) //���ֵ��������MAX_NEIGH(20)
+	{
+		float ks = k_spring;
+		float kd = 0;
+
+		int index_neigh = adj_spring[k];
+		// volatile glm::vec3 p2_cur = x_cur_in[index_neigh];
+		// volatile glm::vec3 p2_lst = x_lst_in[index_neigh];
+		glm::vec3 p2_cur = x_cur_in[index_neigh];
+		glm::vec3 p2_lst = x_lst_in[index_neigh];
+
+		glm::vec3 v2 = (p2_cur - p2_lst) / dt;
+		glm::vec3 deltaP = pos_cur - p2_cur;
+		if (glm::length(deltaP) == 0) { force += glm::vec3(0.0f); continue; }  //deltaP += glm::vec3(0.0001);	//avoid '0'
+
+		glm::vec3 deltaV = vel - v2;
+		float dist = glm::length(deltaP); //avoid '0'
+
+
+		float original_length = glm::distance(x_orignal[index_neigh],x_orignal[index]);
+		float leftTerm = -ks * (dist - original_length);
+		float rightTerm = kd * (glm::dot(deltaV, deltaP) / dist);
+		glm::vec3 springForce = (leftTerm + rightTerm)*glm::normalize(deltaP);
+		
+		force += springForce;
+	}
+	return force;
+
+}
+
+__global__ void show_vbo(
+	const unsigned int num_vertices,
+	glm::vec4 *vbo_vertices, glm::vec3 *vbo_normals,
+	glm::vec3 *x, unsigned int *adj_vertex_to_faces, glm::vec3 *dir_face_normals)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num_vertices)
+		return;
+
+	// volatile glm::vec3 pose = x[index];
+	glm::vec3 pose = x[index];
+
+	// compute point normal
+	glm::vec3 normal(0.0);
+
+	int first_face_index = index * NUM_PER_VERTEX_ADJ_FACES;
+	for (unsigned int i = first_face_index, time = 0;
+		adj_vertex_to_faces[i] < UINT_MAX && time < NUM_PER_VERTEX_ADJ_FACES;
+		++i, ++time)
+	{
+		int findex = adj_vertex_to_faces[i];
+		glm::vec3 fnormal = dir_face_normals[findex];
+		normal += fnormal;
+	}
+	normal = glm::normalize(normal);
+
+	//set new vertex and new normal
+	vbo_vertices[index] = glm::vec4(pose.x, pose.y, pose.z, vbo_vertices[index].w);
+	vbo_normals[index] = glm::vec3(normal.x, normal.y, normal.z);
+}
+
+__global__ void verlet(
+	const BVHAccel tree,
+	const unsigned int num_vertices,
+	glm::vec3 *x_cur_in, glm::vec3 *x_lst_in, glm::vec3 *x_cur_out, glm::vec3 *x_lst_out, glm::vec3 *x_orignal,
+	unsigned int *adj_spring_st, unsigned int *adj_spring_bd,
+	glm::vec3 *dir_collision_force
+#ifdef DEBUG_COLLISION
+	, int *collided_vertex
+#endif
+)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num_vertices)
+		return;
+
+	// volatile glm::vec3 pos_cur = x_cur_in[index];
+	// volatile glm::vec3 pos_lst = x_lst_in[index];
+
+	glm::vec3 pos_cur = x_cur_in[index];
+	glm::vec3 pos_lst = x_lst_in[index];
+
+	glm::vec3 vel = (pos_cur - pos_lst) / dt;
+
+	const glm::vec3 gravity = glm::vec3(0.0f, -1.0 * g, 0.0f); //set gravity
+	glm::vec3 force = gravity * mass + vel * damp;
+	force += get_spring_force(index, x_cur_in, x_lst_in, x_orignal, adj_spring_st, NUM_PER_VERTEX_SPRING_STRUCT, pos_cur, vel, spring_structure); //����һ�����򵯻���
+	force += get_spring_force(index, x_cur_in, x_lst_in, x_orignal, adj_spring_bd, NUM_PER_VERTEX_SPRING_BEND, pos_cur, vel, spring_bend); //����������򵯻���
+
+	glm::vec3 inelastic_force = glm::dot(dir_collision_force[index], force) * dir_collision_force[index];       //collision response force, if intersected, keep tangential
+																												//inelastic_force *= 0.5;
+	force -= inelastic_force;
+	glm::vec3 acc = force / mass;
+	glm::vec3 tmp = pos_cur;
+	pos_cur = pos_cur + pos_cur - pos_lst + acc * dt * dt;
+	pos_lst = tmp;
+
+	// ����pose_old�Ѿ�Ϊ��ǰ��pose��
+	// ����collision_response_projection֮��
+	// pose_old�ֱ�������ڵ�pose��pos_lst = pos_cur;
+	// ���ɹ�����ɵ�pose�仯�򱻸���
+	// ��᲻��Ӱ��vel�ļ��㣿
+	// �����⴦����Ϊ�˷�ֹ��ײ�󷴵���
+	// Ҳ��Ϊ������ƵĴ��ڣ����±������ĸ��ڴ棬����������
+	// һ��Ӱ��ѸĶ����oldд��x_cur_in�����������̼߳����ھ�λ��ʱ������
+	collision_response_projection(tree, force, pos_cur, pos_lst, index, dir_collision_force);
+
+#ifdef DEBUG_COLLISION
+	// debug
+	if (collide(leaf_nodes, internal_nodes, tree.primitives_cur(), force, pos_cur, pos_lst, index, dir_collision_force))
+	{
+		collided_vertex[index] = 1;
+	}
+	else
+	{
+		collided_vertex[index] = 0;
+	}
+#endif
+	x_cur_out[index] = pos_cur;
+	x_lst_out[index] = pos_lst;
+}
+
+__global__ void CCD(
+	const BVHAccel tree,
+	const unsigned int num_vertices,
+	glm::vec3 *x_cur_in, glm::vec3 *x_lst_in, glm::vec3 *x_cur_out, glm::vec3 *x_lst_out, glm::vec3 *x_orignal,
+	glm::vec3 *dir_collision_force)
+{
+	unsigned int index = blockIdx.x * blockDim.x + threadIdx.x;
+	if (index >= num_vertices)
+		return;
+
+	// volatile glm::vec3 pos_cur = x_cur_in[index];
+	// volatile glm::vec3 pos_lst = x_lst_in[index];
+	glm::vec3 pos_cur = x_cur_in[index];
+	glm::vec3 pos_lst = x_lst_in[index];
+
+	//glm::vec3 vel = (pos_cur - pos_lst) / dt;
+
+	glm::vec3 force;
+
+	ccd_response_projection(tree, pos_cur, pos_lst, index, dir_collision_force);
+
+	x_cur_out[index] = pos_cur;
+	x_lst_out[index] = pos_lst;
+}
diff --git a/wglew.h b/wglew.h
new file mode 100644
index 0000000..1779c74
--- /dev/null
+++ b/wglew.h
@@ -0,0 +1,1453 @@
+/*
+** The OpenGL Extension Wrangler Library
+** Copyright (C) 2008-2015, Nigel Stewart <nigels[]users sourceforge net>
+** Copyright (C) 2002-2008, Milan Ikits <milan ikits[]ieee org>
+** Copyright (C) 2002-2008, Marcelo E. Magallon <mmagallo[]debian org>
+** Copyright (C) 2002, Lev Povalahev
+** All rights reserved.
+** 
+** Redistribution and use in source and binary forms, with or without 
+** modification, are permitted provided that the following conditions are met:
+** 
+** * Redistributions of source code must retain the above copyright notice, 
+**   this list of conditions and the following disclaimer.
+** * Redistributions in binary form must reproduce the above copyright notice, 
+**   this list of conditions and the following disclaimer in the documentation 
+**   and/or other materials provided with the distribution.
+** * The name of the author may be used to endorse or promote products 
+**   derived from this software without specific prior written permission.
+**
+** THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 
+** AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 
+** IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+** ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE 
+** LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 
+** CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 
+** SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
+** INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
+** CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
+** ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF
+** THE POSSIBILITY OF SUCH DAMAGE.
+*/
+
+/*
+** Copyright (c) 2007 The Khronos Group Inc.
+** 
+** Permission is hereby granted, free of charge, to any person obtaining a
+** copy of this software and/or associated documentation files (the
+** "Materials"), to deal in the Materials without restriction, including
+** without limitation the rights to use, copy, modify, merge, publish,
+** distribute, sublicense, and/or sell copies of the Materials, and to
+** permit persons to whom the Materials are furnished to do so, subject to
+** the following conditions:
+** 
+** The above copyright notice and this permission notice shall be included
+** in all copies or substantial portions of the Materials.
+** 
+** THE MATERIALS ARE PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
+** EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
+** MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
+** IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
+** CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
+** TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
+** MATERIALS OR THE USE OR OTHER DEALINGS IN THE MATERIALS.
+*/
+
+#ifndef __wglew_h__
+#define __wglew_h__
+#define __WGLEW_H__
+
+#ifdef __wglext_h_
+#error wglext.h included before wglew.h
+#endif
+
+#define __wglext_h_
+
+#if !defined(WINAPI)
+#  ifndef WIN32_LEAN_AND_MEAN
+#    define WIN32_LEAN_AND_MEAN 1
+#  endif
+#include <windows.h>
+#  undef WIN32_LEAN_AND_MEAN
+#endif
+
+/*
+ * GLEW_STATIC needs to be set when using the static version.
+ * GLEW_BUILD is set when building the DLL version.
+ */
+#ifdef GLEW_STATIC
+#  define GLEWAPI extern
+#else
+#  ifdef GLEW_BUILD
+#    define GLEWAPI extern __declspec(dllexport)
+#  else
+#    define GLEWAPI extern __declspec(dllimport)
+#  endif
+#endif
+
+#ifdef __cplusplus
+extern "C" {
+#endif
+
+/* -------------------------- WGL_3DFX_multisample ------------------------- */
+
+#ifndef WGL_3DFX_multisample
+#define WGL_3DFX_multisample 1
+
+#define WGL_SAMPLE_BUFFERS_3DFX 0x2060
+#define WGL_SAMPLES_3DFX 0x2061
+
+#define WGLEW_3DFX_multisample WGLEW_GET_VAR(__WGLEW_3DFX_multisample)
+
+#endif /* WGL_3DFX_multisample */
+
+/* ------------------------- WGL_3DL_stereo_control ------------------------ */
+
+#ifndef WGL_3DL_stereo_control
+#define WGL_3DL_stereo_control 1
+
+#define WGL_STEREO_EMITTER_ENABLE_3DL 0x2055
+#define WGL_STEREO_EMITTER_DISABLE_3DL 0x2056
+#define WGL_STEREO_POLARITY_NORMAL_3DL 0x2057
+#define WGL_STEREO_POLARITY_INVERT_3DL 0x2058
+
+typedef BOOL (WINAPI * PFNWGLSETSTEREOEMITTERSTATE3DLPROC) (HDC hDC, UINT uState);
+
+#define wglSetStereoEmitterState3DL WGLEW_GET_FUN(__wglewSetStereoEmitterState3DL)
+
+#define WGLEW_3DL_stereo_control WGLEW_GET_VAR(__WGLEW_3DL_stereo_control)
+
+#endif /* WGL_3DL_stereo_control */
+
+/* ------------------------ WGL_AMD_gpu_association ------------------------ */
+
+#ifndef WGL_AMD_gpu_association
+#define WGL_AMD_gpu_association 1
+
+#define WGL_GPU_VENDOR_AMD 0x1F00
+#define WGL_GPU_RENDERER_STRING_AMD 0x1F01
+#define WGL_GPU_OPENGL_VERSION_STRING_AMD 0x1F02
+#define WGL_GPU_FASTEST_TARGET_GPUS_AMD 0x21A2
+#define WGL_GPU_RAM_AMD 0x21A3
+#define WGL_GPU_CLOCK_AMD 0x21A4
+#define WGL_GPU_NUM_PIPES_AMD 0x21A5
+#define WGL_GPU_NUM_SIMD_AMD 0x21A6
+#define WGL_GPU_NUM_RB_AMD 0x21A7
+#define WGL_GPU_NUM_SPI_AMD 0x21A8
+
+typedef VOID (WINAPI * PFNWGLBLITCONTEXTFRAMEBUFFERAMDPROC) (HGLRC dstCtx, GLint srcX0, GLint srcY0, GLint srcX1, GLint srcY1, GLint dstX0, GLint dstY0, GLint dstX1, GLint dstY1, GLbitfield mask, GLenum filter);
+typedef HGLRC (WINAPI * PFNWGLCREATEASSOCIATEDCONTEXTAMDPROC) (UINT id);
+typedef HGLRC (WINAPI * PFNWGLCREATEASSOCIATEDCONTEXTATTRIBSAMDPROC) (UINT id, HGLRC hShareContext, const int* attribList);
+typedef BOOL (WINAPI * PFNWGLDELETEASSOCIATEDCONTEXTAMDPROC) (HGLRC hglrc);
+typedef UINT (WINAPI * PFNWGLGETCONTEXTGPUIDAMDPROC) (HGLRC hglrc);
+typedef HGLRC (WINAPI * PFNWGLGETCURRENTASSOCIATEDCONTEXTAMDPROC) (void);
+typedef UINT (WINAPI * PFNWGLGETGPUIDSAMDPROC) (UINT maxCount, UINT* ids);
+typedef INT (WINAPI * PFNWGLGETGPUINFOAMDPROC) (UINT id, INT property, GLenum dataType, UINT size, void* data);
+typedef BOOL (WINAPI * PFNWGLMAKEASSOCIATEDCONTEXTCURRENTAMDPROC) (HGLRC hglrc);
+
+#define wglBlitContextFramebufferAMD WGLEW_GET_FUN(__wglewBlitContextFramebufferAMD)
+#define wglCreateAssociatedContextAMD WGLEW_GET_FUN(__wglewCreateAssociatedContextAMD)
+#define wglCreateAssociatedContextAttribsAMD WGLEW_GET_FUN(__wglewCreateAssociatedContextAttribsAMD)
+#define wglDeleteAssociatedContextAMD WGLEW_GET_FUN(__wglewDeleteAssociatedContextAMD)
+#define wglGetContextGPUIDAMD WGLEW_GET_FUN(__wglewGetContextGPUIDAMD)
+#define wglGetCurrentAssociatedContextAMD WGLEW_GET_FUN(__wglewGetCurrentAssociatedContextAMD)
+#define wglGetGPUIDsAMD WGLEW_GET_FUN(__wglewGetGPUIDsAMD)
+#define wglGetGPUInfoAMD WGLEW_GET_FUN(__wglewGetGPUInfoAMD)
+#define wglMakeAssociatedContextCurrentAMD WGLEW_GET_FUN(__wglewMakeAssociatedContextCurrentAMD)
+
+#define WGLEW_AMD_gpu_association WGLEW_GET_VAR(__WGLEW_AMD_gpu_association)
+
+#endif /* WGL_AMD_gpu_association */
+
+/* ------------------------- WGL_ARB_buffer_region ------------------------- */
+
+#ifndef WGL_ARB_buffer_region
+#define WGL_ARB_buffer_region 1
+
+#define WGL_FRONT_COLOR_BUFFER_BIT_ARB 0x00000001
+#define WGL_BACK_COLOR_BUFFER_BIT_ARB 0x00000002
+#define WGL_DEPTH_BUFFER_BIT_ARB 0x00000004
+#define WGL_STENCIL_BUFFER_BIT_ARB 0x00000008
+
+typedef HANDLE (WINAPI * PFNWGLCREATEBUFFERREGIONARBPROC) (HDC hDC, int iLayerPlane, UINT uType);
+typedef VOID (WINAPI * PFNWGLDELETEBUFFERREGIONARBPROC) (HANDLE hRegion);
+typedef BOOL (WINAPI * PFNWGLRESTOREBUFFERREGIONARBPROC) (HANDLE hRegion, int x, int y, int width, int height, int xSrc, int ySrc);
+typedef BOOL (WINAPI * PFNWGLSAVEBUFFERREGIONARBPROC) (HANDLE hRegion, int x, int y, int width, int height);
+
+#define wglCreateBufferRegionARB WGLEW_GET_FUN(__wglewCreateBufferRegionARB)
+#define wglDeleteBufferRegionARB WGLEW_GET_FUN(__wglewDeleteBufferRegionARB)
+#define wglRestoreBufferRegionARB WGLEW_GET_FUN(__wglewRestoreBufferRegionARB)
+#define wglSaveBufferRegionARB WGLEW_GET_FUN(__wglewSaveBufferRegionARB)
+
+#define WGLEW_ARB_buffer_region WGLEW_GET_VAR(__WGLEW_ARB_buffer_region)
+
+#endif /* WGL_ARB_buffer_region */
+
+/* --------------------- WGL_ARB_context_flush_control --------------------- */
+
+#ifndef WGL_ARB_context_flush_control
+#define WGL_ARB_context_flush_control 1
+
+#define WGL_CONTEXT_RELEASE_BEHAVIOR_NONE_ARB 0x0000
+#define WGL_CONTEXT_RELEASE_BEHAVIOR_ARB 0x2097
+#define WGL_CONTEXT_RELEASE_BEHAVIOR_FLUSH_ARB 0x2098
+
+#define WGLEW_ARB_context_flush_control WGLEW_GET_VAR(__WGLEW_ARB_context_flush_control)
+
+#endif /* WGL_ARB_context_flush_control */
+
+/* ------------------------- WGL_ARB_create_context ------------------------ */
+
+#ifndef WGL_ARB_create_context
+#define WGL_ARB_create_context 1
+
+#define WGL_CONTEXT_DEBUG_BIT_ARB 0x0001
+#define WGL_CONTEXT_FORWARD_COMPATIBLE_BIT_ARB 0x0002
+#define WGL_CONTEXT_MAJOR_VERSION_ARB 0x2091
+#define WGL_CONTEXT_MINOR_VERSION_ARB 0x2092
+#define WGL_CONTEXT_LAYER_PLANE_ARB 0x2093
+#define WGL_CONTEXT_FLAGS_ARB 0x2094
+#define ERROR_INVALID_VERSION_ARB 0x2095
+#define ERROR_INVALID_PROFILE_ARB 0x2096
+
+typedef HGLRC (WINAPI * PFNWGLCREATECONTEXTATTRIBSARBPROC) (HDC hDC, HGLRC hShareContext, const int* attribList);
+
+#define wglCreateContextAttribsARB WGLEW_GET_FUN(__wglewCreateContextAttribsARB)
+
+#define WGLEW_ARB_create_context WGLEW_GET_VAR(__WGLEW_ARB_create_context)
+
+#endif /* WGL_ARB_create_context */
+
+/* --------------------- WGL_ARB_create_context_profile -------------------- */
+
+#ifndef WGL_ARB_create_context_profile
+#define WGL_ARB_create_context_profile 1
+
+#define WGL_CONTEXT_CORE_PROFILE_BIT_ARB 0x00000001
+#define WGL_CONTEXT_COMPATIBILITY_PROFILE_BIT_ARB 0x00000002
+#define WGL_CONTEXT_PROFILE_MASK_ARB 0x9126
+
+#define WGLEW_ARB_create_context_profile WGLEW_GET_VAR(__WGLEW_ARB_create_context_profile)
+
+#endif /* WGL_ARB_create_context_profile */
+
+/* ------------------- WGL_ARB_create_context_robustness ------------------- */
+
+#ifndef WGL_ARB_create_context_robustness
+#define WGL_ARB_create_context_robustness 1
+
+#define WGL_CONTEXT_ROBUST_ACCESS_BIT_ARB 0x00000004
+#define WGL_LOSE_CONTEXT_ON_RESET_ARB 0x8252
+#define WGL_CONTEXT_RESET_NOTIFICATION_STRATEGY_ARB 0x8256
+#define WGL_NO_RESET_NOTIFICATION_ARB 0x8261
+
+#define WGLEW_ARB_create_context_robustness WGLEW_GET_VAR(__WGLEW_ARB_create_context_robustness)
+
+#endif /* WGL_ARB_create_context_robustness */
+
+/* ----------------------- WGL_ARB_extensions_string ----------------------- */
+
+#ifndef WGL_ARB_extensions_string
+#define WGL_ARB_extensions_string 1
+
+typedef const char* (WINAPI * PFNWGLGETEXTENSIONSSTRINGARBPROC) (HDC hdc);
+
+#define wglGetExtensionsStringARB WGLEW_GET_FUN(__wglewGetExtensionsStringARB)
+
+#define WGLEW_ARB_extensions_string WGLEW_GET_VAR(__WGLEW_ARB_extensions_string)
+
+#endif /* WGL_ARB_extensions_string */
+
+/* ------------------------ WGL_ARB_framebuffer_sRGB ----------------------- */
+
+#ifndef WGL_ARB_framebuffer_sRGB
+#define WGL_ARB_framebuffer_sRGB 1
+
+#define WGL_FRAMEBUFFER_SRGB_CAPABLE_ARB 0x20A9
+
+#define WGLEW_ARB_framebuffer_sRGB WGLEW_GET_VAR(__WGLEW_ARB_framebuffer_sRGB)
+
+#endif /* WGL_ARB_framebuffer_sRGB */
+
+/* ----------------------- WGL_ARB_make_current_read ----------------------- */
+
+#ifndef WGL_ARB_make_current_read
+#define WGL_ARB_make_current_read 1
+
+#define ERROR_INVALID_PIXEL_TYPE_ARB 0x2043
+#define ERROR_INCOMPATIBLE_DEVICE_CONTEXTS_ARB 0x2054
+
+typedef HDC (WINAPI * PFNWGLGETCURRENTREADDCARBPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLMAKECONTEXTCURRENTARBPROC) (HDC hDrawDC, HDC hReadDC, HGLRC hglrc);
+
+#define wglGetCurrentReadDCARB WGLEW_GET_FUN(__wglewGetCurrentReadDCARB)
+#define wglMakeContextCurrentARB WGLEW_GET_FUN(__wglewMakeContextCurrentARB)
+
+#define WGLEW_ARB_make_current_read WGLEW_GET_VAR(__WGLEW_ARB_make_current_read)
+
+#endif /* WGL_ARB_make_current_read */
+
+/* -------------------------- WGL_ARB_multisample -------------------------- */
+
+#ifndef WGL_ARB_multisample
+#define WGL_ARB_multisample 1
+
+#define WGL_SAMPLE_BUFFERS_ARB 0x2041
+#define WGL_SAMPLES_ARB 0x2042
+
+#define WGLEW_ARB_multisample WGLEW_GET_VAR(__WGLEW_ARB_multisample)
+
+#endif /* WGL_ARB_multisample */
+
+/* ---------------------------- WGL_ARB_pbuffer ---------------------------- */
+
+#ifndef WGL_ARB_pbuffer
+#define WGL_ARB_pbuffer 1
+
+#define WGL_DRAW_TO_PBUFFER_ARB 0x202D
+#define WGL_MAX_PBUFFER_PIXELS_ARB 0x202E
+#define WGL_MAX_PBUFFER_WIDTH_ARB 0x202F
+#define WGL_MAX_PBUFFER_HEIGHT_ARB 0x2030
+#define WGL_PBUFFER_LARGEST_ARB 0x2033
+#define WGL_PBUFFER_WIDTH_ARB 0x2034
+#define WGL_PBUFFER_HEIGHT_ARB 0x2035
+#define WGL_PBUFFER_LOST_ARB 0x2036
+
+DECLARE_HANDLE(HPBUFFERARB);
+
+typedef HPBUFFERARB (WINAPI * PFNWGLCREATEPBUFFERARBPROC) (HDC hDC, int iPixelFormat, int iWidth, int iHeight, const int* piAttribList);
+typedef BOOL (WINAPI * PFNWGLDESTROYPBUFFERARBPROC) (HPBUFFERARB hPbuffer);
+typedef HDC (WINAPI * PFNWGLGETPBUFFERDCARBPROC) (HPBUFFERARB hPbuffer);
+typedef BOOL (WINAPI * PFNWGLQUERYPBUFFERARBPROC) (HPBUFFERARB hPbuffer, int iAttribute, int* piValue);
+typedef int (WINAPI * PFNWGLRELEASEPBUFFERDCARBPROC) (HPBUFFERARB hPbuffer, HDC hDC);
+
+#define wglCreatePbufferARB WGLEW_GET_FUN(__wglewCreatePbufferARB)
+#define wglDestroyPbufferARB WGLEW_GET_FUN(__wglewDestroyPbufferARB)
+#define wglGetPbufferDCARB WGLEW_GET_FUN(__wglewGetPbufferDCARB)
+#define wglQueryPbufferARB WGLEW_GET_FUN(__wglewQueryPbufferARB)
+#define wglReleasePbufferDCARB WGLEW_GET_FUN(__wglewReleasePbufferDCARB)
+
+#define WGLEW_ARB_pbuffer WGLEW_GET_VAR(__WGLEW_ARB_pbuffer)
+
+#endif /* WGL_ARB_pbuffer */
+
+/* -------------------------- WGL_ARB_pixel_format ------------------------- */
+
+#ifndef WGL_ARB_pixel_format
+#define WGL_ARB_pixel_format 1
+
+#define WGL_NUMBER_PIXEL_FORMATS_ARB 0x2000
+#define WGL_DRAW_TO_WINDOW_ARB 0x2001
+#define WGL_DRAW_TO_BITMAP_ARB 0x2002
+#define WGL_ACCELERATION_ARB 0x2003
+#define WGL_NEED_PALETTE_ARB 0x2004
+#define WGL_NEED_SYSTEM_PALETTE_ARB 0x2005
+#define WGL_SWAP_LAYER_BUFFERS_ARB 0x2006
+#define WGL_SWAP_METHOD_ARB 0x2007
+#define WGL_NUMBER_OVERLAYS_ARB 0x2008
+#define WGL_NUMBER_UNDERLAYS_ARB 0x2009
+#define WGL_TRANSPARENT_ARB 0x200A
+#define WGL_SHARE_DEPTH_ARB 0x200C
+#define WGL_SHARE_STENCIL_ARB 0x200D
+#define WGL_SHARE_ACCUM_ARB 0x200E
+#define WGL_SUPPORT_GDI_ARB 0x200F
+#define WGL_SUPPORT_OPENGL_ARB 0x2010
+#define WGL_DOUBLE_BUFFER_ARB 0x2011
+#define WGL_STEREO_ARB 0x2012
+#define WGL_PIXEL_TYPE_ARB 0x2013
+#define WGL_COLOR_BITS_ARB 0x2014
+#define WGL_RED_BITS_ARB 0x2015
+#define WGL_RED_SHIFT_ARB 0x2016
+#define WGL_GREEN_BITS_ARB 0x2017
+#define WGL_GREEN_SHIFT_ARB 0x2018
+#define WGL_BLUE_BITS_ARB 0x2019
+#define WGL_BLUE_SHIFT_ARB 0x201A
+#define WGL_ALPHA_BITS_ARB 0x201B
+#define WGL_ALPHA_SHIFT_ARB 0x201C
+#define WGL_ACCUM_BITS_ARB 0x201D
+#define WGL_ACCUM_RED_BITS_ARB 0x201E
+#define WGL_ACCUM_GREEN_BITS_ARB 0x201F
+#define WGL_ACCUM_BLUE_BITS_ARB 0x2020
+#define WGL_ACCUM_ALPHA_BITS_ARB 0x2021
+#define WGL_DEPTH_BITS_ARB 0x2022
+#define WGL_STENCIL_BITS_ARB 0x2023
+#define WGL_AUX_BUFFERS_ARB 0x2024
+#define WGL_NO_ACCELERATION_ARB 0x2025
+#define WGL_GENERIC_ACCELERATION_ARB 0x2026
+#define WGL_FULL_ACCELERATION_ARB 0x2027
+#define WGL_SWAP_EXCHANGE_ARB 0x2028
+#define WGL_SWAP_COPY_ARB 0x2029
+#define WGL_SWAP_UNDEFINED_ARB 0x202A
+#define WGL_TYPE_RGBA_ARB 0x202B
+#define WGL_TYPE_COLORINDEX_ARB 0x202C
+#define WGL_TRANSPARENT_RED_VALUE_ARB 0x2037
+#define WGL_TRANSPARENT_GREEN_VALUE_ARB 0x2038
+#define WGL_TRANSPARENT_BLUE_VALUE_ARB 0x2039
+#define WGL_TRANSPARENT_ALPHA_VALUE_ARB 0x203A
+#define WGL_TRANSPARENT_INDEX_VALUE_ARB 0x203B
+
+typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATARBPROC) (HDC hdc, const int* piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, int *piFormats, UINT *nNumFormats);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBFVARBPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, const int* piAttributes, FLOAT *pfValues);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBIVARBPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, const int* piAttributes, int *piValues);
+
+#define wglChoosePixelFormatARB WGLEW_GET_FUN(__wglewChoosePixelFormatARB)
+#define wglGetPixelFormatAttribfvARB WGLEW_GET_FUN(__wglewGetPixelFormatAttribfvARB)
+#define wglGetPixelFormatAttribivARB WGLEW_GET_FUN(__wglewGetPixelFormatAttribivARB)
+
+#define WGLEW_ARB_pixel_format WGLEW_GET_VAR(__WGLEW_ARB_pixel_format)
+
+#endif /* WGL_ARB_pixel_format */
+
+/* ----------------------- WGL_ARB_pixel_format_float ---------------------- */
+
+#ifndef WGL_ARB_pixel_format_float
+#define WGL_ARB_pixel_format_float 1
+
+#define WGL_TYPE_RGBA_FLOAT_ARB 0x21A0
+
+#define WGLEW_ARB_pixel_format_float WGLEW_GET_VAR(__WGLEW_ARB_pixel_format_float)
+
+#endif /* WGL_ARB_pixel_format_float */
+
+/* ------------------------- WGL_ARB_render_texture ------------------------ */
+
+#ifndef WGL_ARB_render_texture
+#define WGL_ARB_render_texture 1
+
+#define WGL_BIND_TO_TEXTURE_RGB_ARB 0x2070
+#define WGL_BIND_TO_TEXTURE_RGBA_ARB 0x2071
+#define WGL_TEXTURE_FORMAT_ARB 0x2072
+#define WGL_TEXTURE_TARGET_ARB 0x2073
+#define WGL_MIPMAP_TEXTURE_ARB 0x2074
+#define WGL_TEXTURE_RGB_ARB 0x2075
+#define WGL_TEXTURE_RGBA_ARB 0x2076
+#define WGL_NO_TEXTURE_ARB 0x2077
+#define WGL_TEXTURE_CUBE_MAP_ARB 0x2078
+#define WGL_TEXTURE_1D_ARB 0x2079
+#define WGL_TEXTURE_2D_ARB 0x207A
+#define WGL_MIPMAP_LEVEL_ARB 0x207B
+#define WGL_CUBE_MAP_FACE_ARB 0x207C
+#define WGL_TEXTURE_CUBE_MAP_POSITIVE_X_ARB 0x207D
+#define WGL_TEXTURE_CUBE_MAP_NEGATIVE_X_ARB 0x207E
+#define WGL_TEXTURE_CUBE_MAP_POSITIVE_Y_ARB 0x207F
+#define WGL_TEXTURE_CUBE_MAP_NEGATIVE_Y_ARB 0x2080
+#define WGL_TEXTURE_CUBE_MAP_POSITIVE_Z_ARB 0x2081
+#define WGL_TEXTURE_CUBE_MAP_NEGATIVE_Z_ARB 0x2082
+#define WGL_FRONT_LEFT_ARB 0x2083
+#define WGL_FRONT_RIGHT_ARB 0x2084
+#define WGL_BACK_LEFT_ARB 0x2085
+#define WGL_BACK_RIGHT_ARB 0x2086
+#define WGL_AUX0_ARB 0x2087
+#define WGL_AUX1_ARB 0x2088
+#define WGL_AUX2_ARB 0x2089
+#define WGL_AUX3_ARB 0x208A
+#define WGL_AUX4_ARB 0x208B
+#define WGL_AUX5_ARB 0x208C
+#define WGL_AUX6_ARB 0x208D
+#define WGL_AUX7_ARB 0x208E
+#define WGL_AUX8_ARB 0x208F
+#define WGL_AUX9_ARB 0x2090
+
+typedef BOOL (WINAPI * PFNWGLBINDTEXIMAGEARBPROC) (HPBUFFERARB hPbuffer, int iBuffer);
+typedef BOOL (WINAPI * PFNWGLRELEASETEXIMAGEARBPROC) (HPBUFFERARB hPbuffer, int iBuffer);
+typedef BOOL (WINAPI * PFNWGLSETPBUFFERATTRIBARBPROC) (HPBUFFERARB hPbuffer, const int* piAttribList);
+
+#define wglBindTexImageARB WGLEW_GET_FUN(__wglewBindTexImageARB)
+#define wglReleaseTexImageARB WGLEW_GET_FUN(__wglewReleaseTexImageARB)
+#define wglSetPbufferAttribARB WGLEW_GET_FUN(__wglewSetPbufferAttribARB)
+
+#define WGLEW_ARB_render_texture WGLEW_GET_VAR(__WGLEW_ARB_render_texture)
+
+#endif /* WGL_ARB_render_texture */
+
+/* ---------------- WGL_ARB_robustness_application_isolation --------------- */
+
+#ifndef WGL_ARB_robustness_application_isolation
+#define WGL_ARB_robustness_application_isolation 1
+
+#define WGL_CONTEXT_RESET_ISOLATION_BIT_ARB 0x00000008
+
+#define WGLEW_ARB_robustness_application_isolation WGLEW_GET_VAR(__WGLEW_ARB_robustness_application_isolation)
+
+#endif /* WGL_ARB_robustness_application_isolation */
+
+/* ---------------- WGL_ARB_robustness_share_group_isolation --------------- */
+
+#ifndef WGL_ARB_robustness_share_group_isolation
+#define WGL_ARB_robustness_share_group_isolation 1
+
+#define WGL_CONTEXT_RESET_ISOLATION_BIT_ARB 0x00000008
+
+#define WGLEW_ARB_robustness_share_group_isolation WGLEW_GET_VAR(__WGLEW_ARB_robustness_share_group_isolation)
+
+#endif /* WGL_ARB_robustness_share_group_isolation */
+
+/* ----------------------- WGL_ATI_pixel_format_float ---------------------- */
+
+#ifndef WGL_ATI_pixel_format_float
+#define WGL_ATI_pixel_format_float 1
+
+#define WGL_TYPE_RGBA_FLOAT_ATI 0x21A0
+#define GL_RGBA_FLOAT_MODE_ATI 0x8820
+#define GL_COLOR_CLEAR_UNCLAMPED_VALUE_ATI 0x8835
+
+#define WGLEW_ATI_pixel_format_float WGLEW_GET_VAR(__WGLEW_ATI_pixel_format_float)
+
+#endif /* WGL_ATI_pixel_format_float */
+
+/* -------------------- WGL_ATI_render_texture_rectangle ------------------- */
+
+#ifndef WGL_ATI_render_texture_rectangle
+#define WGL_ATI_render_texture_rectangle 1
+
+#define WGL_TEXTURE_RECTANGLE_ATI 0x21A5
+
+#define WGLEW_ATI_render_texture_rectangle WGLEW_GET_VAR(__WGLEW_ATI_render_texture_rectangle)
+
+#endif /* WGL_ATI_render_texture_rectangle */
+
+/* ------------------- WGL_EXT_create_context_es2_profile ------------------ */
+
+#ifndef WGL_EXT_create_context_es2_profile
+#define WGL_EXT_create_context_es2_profile 1
+
+#define WGL_CONTEXT_ES2_PROFILE_BIT_EXT 0x00000004
+
+#define WGLEW_EXT_create_context_es2_profile WGLEW_GET_VAR(__WGLEW_EXT_create_context_es2_profile)
+
+#endif /* WGL_EXT_create_context_es2_profile */
+
+/* ------------------- WGL_EXT_create_context_es_profile ------------------- */
+
+#ifndef WGL_EXT_create_context_es_profile
+#define WGL_EXT_create_context_es_profile 1
+
+#define WGL_CONTEXT_ES_PROFILE_BIT_EXT 0x00000004
+
+#define WGLEW_EXT_create_context_es_profile WGLEW_GET_VAR(__WGLEW_EXT_create_context_es_profile)
+
+#endif /* WGL_EXT_create_context_es_profile */
+
+/* -------------------------- WGL_EXT_depth_float -------------------------- */
+
+#ifndef WGL_EXT_depth_float
+#define WGL_EXT_depth_float 1
+
+#define WGL_DEPTH_FLOAT_EXT 0x2040
+
+#define WGLEW_EXT_depth_float WGLEW_GET_VAR(__WGLEW_EXT_depth_float)
+
+#endif /* WGL_EXT_depth_float */
+
+/* ---------------------- WGL_EXT_display_color_table ---------------------- */
+
+#ifndef WGL_EXT_display_color_table
+#define WGL_EXT_display_color_table 1
+
+typedef GLboolean (WINAPI * PFNWGLBINDDISPLAYCOLORTABLEEXTPROC) (GLushort id);
+typedef GLboolean (WINAPI * PFNWGLCREATEDISPLAYCOLORTABLEEXTPROC) (GLushort id);
+typedef void (WINAPI * PFNWGLDESTROYDISPLAYCOLORTABLEEXTPROC) (GLushort id);
+typedef GLboolean (WINAPI * PFNWGLLOADDISPLAYCOLORTABLEEXTPROC) (GLushort* table, GLuint length);
+
+#define wglBindDisplayColorTableEXT WGLEW_GET_FUN(__wglewBindDisplayColorTableEXT)
+#define wglCreateDisplayColorTableEXT WGLEW_GET_FUN(__wglewCreateDisplayColorTableEXT)
+#define wglDestroyDisplayColorTableEXT WGLEW_GET_FUN(__wglewDestroyDisplayColorTableEXT)
+#define wglLoadDisplayColorTableEXT WGLEW_GET_FUN(__wglewLoadDisplayColorTableEXT)
+
+#define WGLEW_EXT_display_color_table WGLEW_GET_VAR(__WGLEW_EXT_display_color_table)
+
+#endif /* WGL_EXT_display_color_table */
+
+/* ----------------------- WGL_EXT_extensions_string ----------------------- */
+
+#ifndef WGL_EXT_extensions_string
+#define WGL_EXT_extensions_string 1
+
+typedef const char* (WINAPI * PFNWGLGETEXTENSIONSSTRINGEXTPROC) (void);
+
+#define wglGetExtensionsStringEXT WGLEW_GET_FUN(__wglewGetExtensionsStringEXT)
+
+#define WGLEW_EXT_extensions_string WGLEW_GET_VAR(__WGLEW_EXT_extensions_string)
+
+#endif /* WGL_EXT_extensions_string */
+
+/* ------------------------ WGL_EXT_framebuffer_sRGB ----------------------- */
+
+#ifndef WGL_EXT_framebuffer_sRGB
+#define WGL_EXT_framebuffer_sRGB 1
+
+#define WGL_FRAMEBUFFER_SRGB_CAPABLE_EXT 0x20A9
+
+#define WGLEW_EXT_framebuffer_sRGB WGLEW_GET_VAR(__WGLEW_EXT_framebuffer_sRGB)
+
+#endif /* WGL_EXT_framebuffer_sRGB */
+
+/* ----------------------- WGL_EXT_make_current_read ----------------------- */
+
+#ifndef WGL_EXT_make_current_read
+#define WGL_EXT_make_current_read 1
+
+#define ERROR_INVALID_PIXEL_TYPE_EXT 0x2043
+
+typedef HDC (WINAPI * PFNWGLGETCURRENTREADDCEXTPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLMAKECONTEXTCURRENTEXTPROC) (HDC hDrawDC, HDC hReadDC, HGLRC hglrc);
+
+#define wglGetCurrentReadDCEXT WGLEW_GET_FUN(__wglewGetCurrentReadDCEXT)
+#define wglMakeContextCurrentEXT WGLEW_GET_FUN(__wglewMakeContextCurrentEXT)
+
+#define WGLEW_EXT_make_current_read WGLEW_GET_VAR(__WGLEW_EXT_make_current_read)
+
+#endif /* WGL_EXT_make_current_read */
+
+/* -------------------------- WGL_EXT_multisample -------------------------- */
+
+#ifndef WGL_EXT_multisample
+#define WGL_EXT_multisample 1
+
+#define WGL_SAMPLE_BUFFERS_EXT 0x2041
+#define WGL_SAMPLES_EXT 0x2042
+
+#define WGLEW_EXT_multisample WGLEW_GET_VAR(__WGLEW_EXT_multisample)
+
+#endif /* WGL_EXT_multisample */
+
+/* ---------------------------- WGL_EXT_pbuffer ---------------------------- */
+
+#ifndef WGL_EXT_pbuffer
+#define WGL_EXT_pbuffer 1
+
+#define WGL_DRAW_TO_PBUFFER_EXT 0x202D
+#define WGL_MAX_PBUFFER_PIXELS_EXT 0x202E
+#define WGL_MAX_PBUFFER_WIDTH_EXT 0x202F
+#define WGL_MAX_PBUFFER_HEIGHT_EXT 0x2030
+#define WGL_OPTIMAL_PBUFFER_WIDTH_EXT 0x2031
+#define WGL_OPTIMAL_PBUFFER_HEIGHT_EXT 0x2032
+#define WGL_PBUFFER_LARGEST_EXT 0x2033
+#define WGL_PBUFFER_WIDTH_EXT 0x2034
+#define WGL_PBUFFER_HEIGHT_EXT 0x2035
+
+DECLARE_HANDLE(HPBUFFEREXT);
+
+typedef HPBUFFEREXT (WINAPI * PFNWGLCREATEPBUFFEREXTPROC) (HDC hDC, int iPixelFormat, int iWidth, int iHeight, const int* piAttribList);
+typedef BOOL (WINAPI * PFNWGLDESTROYPBUFFEREXTPROC) (HPBUFFEREXT hPbuffer);
+typedef HDC (WINAPI * PFNWGLGETPBUFFERDCEXTPROC) (HPBUFFEREXT hPbuffer);
+typedef BOOL (WINAPI * PFNWGLQUERYPBUFFEREXTPROC) (HPBUFFEREXT hPbuffer, int iAttribute, int* piValue);
+typedef int (WINAPI * PFNWGLRELEASEPBUFFERDCEXTPROC) (HPBUFFEREXT hPbuffer, HDC hDC);
+
+#define wglCreatePbufferEXT WGLEW_GET_FUN(__wglewCreatePbufferEXT)
+#define wglDestroyPbufferEXT WGLEW_GET_FUN(__wglewDestroyPbufferEXT)
+#define wglGetPbufferDCEXT WGLEW_GET_FUN(__wglewGetPbufferDCEXT)
+#define wglQueryPbufferEXT WGLEW_GET_FUN(__wglewQueryPbufferEXT)
+#define wglReleasePbufferDCEXT WGLEW_GET_FUN(__wglewReleasePbufferDCEXT)
+
+#define WGLEW_EXT_pbuffer WGLEW_GET_VAR(__WGLEW_EXT_pbuffer)
+
+#endif /* WGL_EXT_pbuffer */
+
+/* -------------------------- WGL_EXT_pixel_format ------------------------- */
+
+#ifndef WGL_EXT_pixel_format
+#define WGL_EXT_pixel_format 1
+
+#define WGL_NUMBER_PIXEL_FORMATS_EXT 0x2000
+#define WGL_DRAW_TO_WINDOW_EXT 0x2001
+#define WGL_DRAW_TO_BITMAP_EXT 0x2002
+#define WGL_ACCELERATION_EXT 0x2003
+#define WGL_NEED_PALETTE_EXT 0x2004
+#define WGL_NEED_SYSTEM_PALETTE_EXT 0x2005
+#define WGL_SWAP_LAYER_BUFFERS_EXT 0x2006
+#define WGL_SWAP_METHOD_EXT 0x2007
+#define WGL_NUMBER_OVERLAYS_EXT 0x2008
+#define WGL_NUMBER_UNDERLAYS_EXT 0x2009
+#define WGL_TRANSPARENT_EXT 0x200A
+#define WGL_TRANSPARENT_VALUE_EXT 0x200B
+#define WGL_SHARE_DEPTH_EXT 0x200C
+#define WGL_SHARE_STENCIL_EXT 0x200D
+#define WGL_SHARE_ACCUM_EXT 0x200E
+#define WGL_SUPPORT_GDI_EXT 0x200F
+#define WGL_SUPPORT_OPENGL_EXT 0x2010
+#define WGL_DOUBLE_BUFFER_EXT 0x2011
+#define WGL_STEREO_EXT 0x2012
+#define WGL_PIXEL_TYPE_EXT 0x2013
+#define WGL_COLOR_BITS_EXT 0x2014
+#define WGL_RED_BITS_EXT 0x2015
+#define WGL_RED_SHIFT_EXT 0x2016
+#define WGL_GREEN_BITS_EXT 0x2017
+#define WGL_GREEN_SHIFT_EXT 0x2018
+#define WGL_BLUE_BITS_EXT 0x2019
+#define WGL_BLUE_SHIFT_EXT 0x201A
+#define WGL_ALPHA_BITS_EXT 0x201B
+#define WGL_ALPHA_SHIFT_EXT 0x201C
+#define WGL_ACCUM_BITS_EXT 0x201D
+#define WGL_ACCUM_RED_BITS_EXT 0x201E
+#define WGL_ACCUM_GREEN_BITS_EXT 0x201F
+#define WGL_ACCUM_BLUE_BITS_EXT 0x2020
+#define WGL_ACCUM_ALPHA_BITS_EXT 0x2021
+#define WGL_DEPTH_BITS_EXT 0x2022
+#define WGL_STENCIL_BITS_EXT 0x2023
+#define WGL_AUX_BUFFERS_EXT 0x2024
+#define WGL_NO_ACCELERATION_EXT 0x2025
+#define WGL_GENERIC_ACCELERATION_EXT 0x2026
+#define WGL_FULL_ACCELERATION_EXT 0x2027
+#define WGL_SWAP_EXCHANGE_EXT 0x2028
+#define WGL_SWAP_COPY_EXT 0x2029
+#define WGL_SWAP_UNDEFINED_EXT 0x202A
+#define WGL_TYPE_RGBA_EXT 0x202B
+#define WGL_TYPE_COLORINDEX_EXT 0x202C
+
+typedef BOOL (WINAPI * PFNWGLCHOOSEPIXELFORMATEXTPROC) (HDC hdc, const int* piAttribIList, const FLOAT *pfAttribFList, UINT nMaxFormats, int *piFormats, UINT *nNumFormats);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBFVEXTPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, int* piAttributes, FLOAT *pfValues);
+typedef BOOL (WINAPI * PFNWGLGETPIXELFORMATATTRIBIVEXTPROC) (HDC hdc, int iPixelFormat, int iLayerPlane, UINT nAttributes, int* piAttributes, int *piValues);
+
+#define wglChoosePixelFormatEXT WGLEW_GET_FUN(__wglewChoosePixelFormatEXT)
+#define wglGetPixelFormatAttribfvEXT WGLEW_GET_FUN(__wglewGetPixelFormatAttribfvEXT)
+#define wglGetPixelFormatAttribivEXT WGLEW_GET_FUN(__wglewGetPixelFormatAttribivEXT)
+
+#define WGLEW_EXT_pixel_format WGLEW_GET_VAR(__WGLEW_EXT_pixel_format)
+
+#endif /* WGL_EXT_pixel_format */
+
+/* ------------------- WGL_EXT_pixel_format_packed_float ------------------- */
+
+#ifndef WGL_EXT_pixel_format_packed_float
+#define WGL_EXT_pixel_format_packed_float 1
+
+#define WGL_TYPE_RGBA_UNSIGNED_FLOAT_EXT 0x20A8
+
+#define WGLEW_EXT_pixel_format_packed_float WGLEW_GET_VAR(__WGLEW_EXT_pixel_format_packed_float)
+
+#endif /* WGL_EXT_pixel_format_packed_float */
+
+/* -------------------------- WGL_EXT_swap_control ------------------------- */
+
+#ifndef WGL_EXT_swap_control
+#define WGL_EXT_swap_control 1
+
+typedef int (WINAPI * PFNWGLGETSWAPINTERVALEXTPROC) (void);
+typedef BOOL (WINAPI * PFNWGLSWAPINTERVALEXTPROC) (int interval);
+
+#define wglGetSwapIntervalEXT WGLEW_GET_FUN(__wglewGetSwapIntervalEXT)
+#define wglSwapIntervalEXT WGLEW_GET_FUN(__wglewSwapIntervalEXT)
+
+#define WGLEW_EXT_swap_control WGLEW_GET_VAR(__WGLEW_EXT_swap_control)
+
+#endif /* WGL_EXT_swap_control */
+
+/* ----------------------- WGL_EXT_swap_control_tear ----------------------- */
+
+#ifndef WGL_EXT_swap_control_tear
+#define WGL_EXT_swap_control_tear 1
+
+#define WGLEW_EXT_swap_control_tear WGLEW_GET_VAR(__WGLEW_EXT_swap_control_tear)
+
+#endif /* WGL_EXT_swap_control_tear */
+
+/* --------------------- WGL_I3D_digital_video_control --------------------- */
+
+#ifndef WGL_I3D_digital_video_control
+#define WGL_I3D_digital_video_control 1
+
+#define WGL_DIGITAL_VIDEO_CURSOR_ALPHA_FRAMEBUFFER_I3D 0x2050
+#define WGL_DIGITAL_VIDEO_CURSOR_ALPHA_VALUE_I3D 0x2051
+#define WGL_DIGITAL_VIDEO_CURSOR_INCLUDED_I3D 0x2052
+#define WGL_DIGITAL_VIDEO_GAMMA_CORRECTED_I3D 0x2053
+
+typedef BOOL (WINAPI * PFNWGLGETDIGITALVIDEOPARAMETERSI3DPROC) (HDC hDC, int iAttribute, int* piValue);
+typedef BOOL (WINAPI * PFNWGLSETDIGITALVIDEOPARAMETERSI3DPROC) (HDC hDC, int iAttribute, const int* piValue);
+
+#define wglGetDigitalVideoParametersI3D WGLEW_GET_FUN(__wglewGetDigitalVideoParametersI3D)
+#define wglSetDigitalVideoParametersI3D WGLEW_GET_FUN(__wglewSetDigitalVideoParametersI3D)
+
+#define WGLEW_I3D_digital_video_control WGLEW_GET_VAR(__WGLEW_I3D_digital_video_control)
+
+#endif /* WGL_I3D_digital_video_control */
+
+/* ----------------------------- WGL_I3D_gamma ----------------------------- */
+
+#ifndef WGL_I3D_gamma
+#define WGL_I3D_gamma 1
+
+#define WGL_GAMMA_TABLE_SIZE_I3D 0x204E
+#define WGL_GAMMA_EXCLUDE_DESKTOP_I3D 0x204F
+
+typedef BOOL (WINAPI * PFNWGLGETGAMMATABLEI3DPROC) (HDC hDC, int iEntries, USHORT* puRed, USHORT *puGreen, USHORT *puBlue);
+typedef BOOL (WINAPI * PFNWGLGETGAMMATABLEPARAMETERSI3DPROC) (HDC hDC, int iAttribute, int* piValue);
+typedef BOOL (WINAPI * PFNWGLSETGAMMATABLEI3DPROC) (HDC hDC, int iEntries, const USHORT* puRed, const USHORT *puGreen, const USHORT *puBlue);
+typedef BOOL (WINAPI * PFNWGLSETGAMMATABLEPARAMETERSI3DPROC) (HDC hDC, int iAttribute, const int* piValue);
+
+#define wglGetGammaTableI3D WGLEW_GET_FUN(__wglewGetGammaTableI3D)
+#define wglGetGammaTableParametersI3D WGLEW_GET_FUN(__wglewGetGammaTableParametersI3D)
+#define wglSetGammaTableI3D WGLEW_GET_FUN(__wglewSetGammaTableI3D)
+#define wglSetGammaTableParametersI3D WGLEW_GET_FUN(__wglewSetGammaTableParametersI3D)
+
+#define WGLEW_I3D_gamma WGLEW_GET_VAR(__WGLEW_I3D_gamma)
+
+#endif /* WGL_I3D_gamma */
+
+/* ---------------------------- WGL_I3D_genlock ---------------------------- */
+
+#ifndef WGL_I3D_genlock
+#define WGL_I3D_genlock 1
+
+#define WGL_GENLOCK_SOURCE_MULTIVIEW_I3D 0x2044
+#define WGL_GENLOCK_SOURCE_EXTERNAL_SYNC_I3D 0x2045
+#define WGL_GENLOCK_SOURCE_EXTERNAL_FIELD_I3D 0x2046
+#define WGL_GENLOCK_SOURCE_EXTERNAL_TTL_I3D 0x2047
+#define WGL_GENLOCK_SOURCE_DIGITAL_SYNC_I3D 0x2048
+#define WGL_GENLOCK_SOURCE_DIGITAL_FIELD_I3D 0x2049
+#define WGL_GENLOCK_SOURCE_EDGE_FALLING_I3D 0x204A
+#define WGL_GENLOCK_SOURCE_EDGE_RISING_I3D 0x204B
+#define WGL_GENLOCK_SOURCE_EDGE_BOTH_I3D 0x204C
+
+typedef BOOL (WINAPI * PFNWGLDISABLEGENLOCKI3DPROC) (HDC hDC);
+typedef BOOL (WINAPI * PFNWGLENABLEGENLOCKI3DPROC) (HDC hDC);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSAMPLERATEI3DPROC) (HDC hDC, UINT uRate);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSOURCEDELAYI3DPROC) (HDC hDC, UINT uDelay);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSOURCEEDGEI3DPROC) (HDC hDC, UINT uEdge);
+typedef BOOL (WINAPI * PFNWGLGENLOCKSOURCEI3DPROC) (HDC hDC, UINT uSource);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSAMPLERATEI3DPROC) (HDC hDC, UINT* uRate);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSOURCEDELAYI3DPROC) (HDC hDC, UINT* uDelay);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSOURCEEDGEI3DPROC) (HDC hDC, UINT* uEdge);
+typedef BOOL (WINAPI * PFNWGLGETGENLOCKSOURCEI3DPROC) (HDC hDC, UINT* uSource);
+typedef BOOL (WINAPI * PFNWGLISENABLEDGENLOCKI3DPROC) (HDC hDC, BOOL* pFlag);
+typedef BOOL (WINAPI * PFNWGLQUERYGENLOCKMAXSOURCEDELAYI3DPROC) (HDC hDC, UINT* uMaxLineDelay, UINT *uMaxPixelDelay);
+
+#define wglDisableGenlockI3D WGLEW_GET_FUN(__wglewDisableGenlockI3D)
+#define wglEnableGenlockI3D WGLEW_GET_FUN(__wglewEnableGenlockI3D)
+#define wglGenlockSampleRateI3D WGLEW_GET_FUN(__wglewGenlockSampleRateI3D)
+#define wglGenlockSourceDelayI3D WGLEW_GET_FUN(__wglewGenlockSourceDelayI3D)
+#define wglGenlockSourceEdgeI3D WGLEW_GET_FUN(__wglewGenlockSourceEdgeI3D)
+#define wglGenlockSourceI3D WGLEW_GET_FUN(__wglewGenlockSourceI3D)
+#define wglGetGenlockSampleRateI3D WGLEW_GET_FUN(__wglewGetGenlockSampleRateI3D)
+#define wglGetGenlockSourceDelayI3D WGLEW_GET_FUN(__wglewGetGenlockSourceDelayI3D)
+#define wglGetGenlockSourceEdgeI3D WGLEW_GET_FUN(__wglewGetGenlockSourceEdgeI3D)
+#define wglGetGenlockSourceI3D WGLEW_GET_FUN(__wglewGetGenlockSourceI3D)
+#define wglIsEnabledGenlockI3D WGLEW_GET_FUN(__wglewIsEnabledGenlockI3D)
+#define wglQueryGenlockMaxSourceDelayI3D WGLEW_GET_FUN(__wglewQueryGenlockMaxSourceDelayI3D)
+
+#define WGLEW_I3D_genlock WGLEW_GET_VAR(__WGLEW_I3D_genlock)
+
+#endif /* WGL_I3D_genlock */
+
+/* -------------------------- WGL_I3D_image_buffer ------------------------- */
+
+#ifndef WGL_I3D_image_buffer
+#define WGL_I3D_image_buffer 1
+
+#define WGL_IMAGE_BUFFER_MIN_ACCESS_I3D 0x00000001
+#define WGL_IMAGE_BUFFER_LOCK_I3D 0x00000002
+
+typedef BOOL (WINAPI * PFNWGLASSOCIATEIMAGEBUFFEREVENTSI3DPROC) (HDC hdc, HANDLE* pEvent, LPVOID *pAddress, DWORD *pSize, UINT count);
+typedef LPVOID (WINAPI * PFNWGLCREATEIMAGEBUFFERI3DPROC) (HDC hDC, DWORD dwSize, UINT uFlags);
+typedef BOOL (WINAPI * PFNWGLDESTROYIMAGEBUFFERI3DPROC) (HDC hDC, LPVOID pAddress);
+typedef BOOL (WINAPI * PFNWGLRELEASEIMAGEBUFFEREVENTSI3DPROC) (HDC hdc, LPVOID* pAddress, UINT count);
+
+#define wglAssociateImageBufferEventsI3D WGLEW_GET_FUN(__wglewAssociateImageBufferEventsI3D)
+#define wglCreateImageBufferI3D WGLEW_GET_FUN(__wglewCreateImageBufferI3D)
+#define wglDestroyImageBufferI3D WGLEW_GET_FUN(__wglewDestroyImageBufferI3D)
+#define wglReleaseImageBufferEventsI3D WGLEW_GET_FUN(__wglewReleaseImageBufferEventsI3D)
+
+#define WGLEW_I3D_image_buffer WGLEW_GET_VAR(__WGLEW_I3D_image_buffer)
+
+#endif /* WGL_I3D_image_buffer */
+
+/* ------------------------ WGL_I3D_swap_frame_lock ------------------------ */
+
+#ifndef WGL_I3D_swap_frame_lock
+#define WGL_I3D_swap_frame_lock 1
+
+typedef BOOL (WINAPI * PFNWGLDISABLEFRAMELOCKI3DPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLENABLEFRAMELOCKI3DPROC) (VOID);
+typedef BOOL (WINAPI * PFNWGLISENABLEDFRAMELOCKI3DPROC) (BOOL* pFlag);
+typedef BOOL (WINAPI * PFNWGLQUERYFRAMELOCKMASTERI3DPROC) (BOOL* pFlag);
+
+#define wglDisableFrameLockI3D WGLEW_GET_FUN(__wglewDisableFrameLockI3D)
+#define wglEnableFrameLockI3D WGLEW_GET_FUN(__wglewEnableFrameLockI3D)
+#define wglIsEnabledFrameLockI3D WGLEW_GET_FUN(__wglewIsEnabledFrameLockI3D)
+#define wglQueryFrameLockMasterI3D WGLEW_GET_FUN(__wglewQueryFrameLockMasterI3D)
+
+#define WGLEW_I3D_swap_frame_lock WGLEW_GET_VAR(__WGLEW_I3D_swap_frame_lock)
+
+#endif /* WGL_I3D_swap_frame_lock */
+
+/* ------------------------ WGL_I3D_swap_frame_usage ----------------------- */
+
+#ifndef WGL_I3D_swap_frame_usage
+#define WGL_I3D_swap_frame_usage 1
+
+typedef BOOL (WINAPI * PFNWGLBEGINFRAMETRACKINGI3DPROC) (void);
+typedef BOOL (WINAPI * PFNWGLENDFRAMETRACKINGI3DPROC) (void);
+typedef BOOL (WINAPI * PFNWGLGETFRAMEUSAGEI3DPROC) (float* pUsage);
+typedef BOOL (WINAPI * PFNWGLQUERYFRAMETRACKINGI3DPROC) (DWORD* pFrameCount, DWORD *pMissedFrames, float *pLastMissedUsage);
+
+#define wglBeginFrameTrackingI3D WGLEW_GET_FUN(__wglewBeginFrameTrackingI3D)
+#define wglEndFrameTrackingI3D WGLEW_GET_FUN(__wglewEndFrameTrackingI3D)
+#define wglGetFrameUsageI3D WGLEW_GET_FUN(__wglewGetFrameUsageI3D)
+#define wglQueryFrameTrackingI3D WGLEW_GET_FUN(__wglewQueryFrameTrackingI3D)
+
+#define WGLEW_I3D_swap_frame_usage WGLEW_GET_VAR(__WGLEW_I3D_swap_frame_usage)
+
+#endif /* WGL_I3D_swap_frame_usage */
+
+/* --------------------------- WGL_NV_DX_interop --------------------------- */
+
+#ifndef WGL_NV_DX_interop
+#define WGL_NV_DX_interop 1
+
+#define WGL_ACCESS_READ_ONLY_NV 0x0000
+#define WGL_ACCESS_READ_WRITE_NV 0x0001
+#define WGL_ACCESS_WRITE_DISCARD_NV 0x0002
+
+typedef BOOL (WINAPI * PFNWGLDXCLOSEDEVICENVPROC) (HANDLE hDevice);
+typedef BOOL (WINAPI * PFNWGLDXLOCKOBJECTSNVPROC) (HANDLE hDevice, GLint count, HANDLE* hObjects);
+typedef BOOL (WINAPI * PFNWGLDXOBJECTACCESSNVPROC) (HANDLE hObject, GLenum access);
+typedef HANDLE (WINAPI * PFNWGLDXOPENDEVICENVPROC) (void* dxDevice);
+typedef HANDLE (WINAPI * PFNWGLDXREGISTEROBJECTNVPROC) (HANDLE hDevice, void* dxObject, GLuint name, GLenum type, GLenum access);
+typedef BOOL (WINAPI * PFNWGLDXSETRESOURCESHAREHANDLENVPROC) (void* dxObject, HANDLE shareHandle);
+typedef BOOL (WINAPI * PFNWGLDXUNLOCKOBJECTSNVPROC) (HANDLE hDevice, GLint count, HANDLE* hObjects);
+typedef BOOL (WINAPI * PFNWGLDXUNREGISTEROBJECTNVPROC) (HANDLE hDevice, HANDLE hObject);
+
+#define wglDXCloseDeviceNV WGLEW_GET_FUN(__wglewDXCloseDeviceNV)
+#define wglDXLockObjectsNV WGLEW_GET_FUN(__wglewDXLockObjectsNV)
+#define wglDXObjectAccessNV WGLEW_GET_FUN(__wglewDXObjectAccessNV)
+#define wglDXOpenDeviceNV WGLEW_GET_FUN(__wglewDXOpenDeviceNV)
+#define wglDXRegisterObjectNV WGLEW_GET_FUN(__wglewDXRegisterObjectNV)
+#define wglDXSetResourceShareHandleNV WGLEW_GET_FUN(__wglewDXSetResourceShareHandleNV)
+#define wglDXUnlockObjectsNV WGLEW_GET_FUN(__wglewDXUnlockObjectsNV)
+#define wglDXUnregisterObjectNV WGLEW_GET_FUN(__wglewDXUnregisterObjectNV)
+
+#define WGLEW_NV_DX_interop WGLEW_GET_VAR(__WGLEW_NV_DX_interop)
+
+#endif /* WGL_NV_DX_interop */
+
+/* --------------------------- WGL_NV_DX_interop2 -------------------------- */
+
+#ifndef WGL_NV_DX_interop2
+#define WGL_NV_DX_interop2 1
+
+#define WGLEW_NV_DX_interop2 WGLEW_GET_VAR(__WGLEW_NV_DX_interop2)
+
+#endif /* WGL_NV_DX_interop2 */
+
+/* --------------------------- WGL_NV_copy_image --------------------------- */
+
+#ifndef WGL_NV_copy_image
+#define WGL_NV_copy_image 1
+
+typedef BOOL (WINAPI * PFNWGLCOPYIMAGESUBDATANVPROC) (HGLRC hSrcRC, GLuint srcName, GLenum srcTarget, GLint srcLevel, GLint srcX, GLint srcY, GLint srcZ, HGLRC hDstRC, GLuint dstName, GLenum dstTarget, GLint dstLevel, GLint dstX, GLint dstY, GLint dstZ, GLsizei width, GLsizei height, GLsizei depth);
+
+#define wglCopyImageSubDataNV WGLEW_GET_FUN(__wglewCopyImageSubDataNV)
+
+#define WGLEW_NV_copy_image WGLEW_GET_VAR(__WGLEW_NV_copy_image)
+
+#endif /* WGL_NV_copy_image */
+
+/* ------------------------ WGL_NV_delay_before_swap ----------------------- */
+
+#ifndef WGL_NV_delay_before_swap
+#define WGL_NV_delay_before_swap 1
+
+typedef BOOL (WINAPI * PFNWGLDELAYBEFORESWAPNVPROC) (HDC hDC, GLfloat seconds);
+
+#define wglDelayBeforeSwapNV WGLEW_GET_FUN(__wglewDelayBeforeSwapNV)
+
+#define WGLEW_NV_delay_before_swap WGLEW_GET_VAR(__WGLEW_NV_delay_before_swap)
+
+#endif /* WGL_NV_delay_before_swap */
+
+/* -------------------------- WGL_NV_float_buffer -------------------------- */
+
+#ifndef WGL_NV_float_buffer
+#define WGL_NV_float_buffer 1
+
+#define WGL_FLOAT_COMPONENTS_NV 0x20B0
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_R_NV 0x20B1
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_RG_NV 0x20B2
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_RGB_NV 0x20B3
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_FLOAT_RGBA_NV 0x20B4
+#define WGL_TEXTURE_FLOAT_R_NV 0x20B5
+#define WGL_TEXTURE_FLOAT_RG_NV 0x20B6
+#define WGL_TEXTURE_FLOAT_RGB_NV 0x20B7
+#define WGL_TEXTURE_FLOAT_RGBA_NV 0x20B8
+
+#define WGLEW_NV_float_buffer WGLEW_GET_VAR(__WGLEW_NV_float_buffer)
+
+#endif /* WGL_NV_float_buffer */
+
+/* -------------------------- WGL_NV_gpu_affinity -------------------------- */
+
+#ifndef WGL_NV_gpu_affinity
+#define WGL_NV_gpu_affinity 1
+
+#define WGL_ERROR_INCOMPATIBLE_AFFINITY_MASKS_NV 0x20D0
+#define WGL_ERROR_MISSING_AFFINITY_MASK_NV 0x20D1
+
+//DECLARE_HANDLE(HGPUNV);
+typedef struct _GPU_DEVICE {
+  DWORD cb; 
+  CHAR DeviceName[32]; 
+  CHAR DeviceString[128]; 
+  DWORD Flags; 
+  RECT rcVirtualScreen; 
+} GPU_DEVICE, *PGPU_DEVICE;
+
+typedef HDC (WINAPI * PFNWGLCREATEAFFINITYDCNVPROC) (const HGPUNV *phGpuList);
+typedef BOOL (WINAPI * PFNWGLDELETEDCNVPROC) (HDC hdc);
+typedef BOOL (WINAPI * PFNWGLENUMGPUDEVICESNVPROC) (HGPUNV hGpu, UINT iDeviceIndex, PGPU_DEVICE lpGpuDevice);
+typedef BOOL (WINAPI * PFNWGLENUMGPUSFROMAFFINITYDCNVPROC) (HDC hAffinityDC, UINT iGpuIndex, HGPUNV *hGpu);
+typedef BOOL (WINAPI * PFNWGLENUMGPUSNVPROC) (UINT iGpuIndex, HGPUNV *phGpu);
+
+#define wglCreateAffinityDCNV WGLEW_GET_FUN(__wglewCreateAffinityDCNV)
+#define wglDeleteDCNV WGLEW_GET_FUN(__wglewDeleteDCNV)
+#define wglEnumGpuDevicesNV WGLEW_GET_FUN(__wglewEnumGpuDevicesNV)
+#define wglEnumGpusFromAffinityDCNV WGLEW_GET_FUN(__wglewEnumGpusFromAffinityDCNV)
+#define wglEnumGpusNV WGLEW_GET_FUN(__wglewEnumGpusNV)
+
+#define WGLEW_NV_gpu_affinity WGLEW_GET_VAR(__WGLEW_NV_gpu_affinity)
+
+#endif /* WGL_NV_gpu_affinity */
+
+/* ---------------------- WGL_NV_multisample_coverage ---------------------- */
+
+#ifndef WGL_NV_multisample_coverage
+#define WGL_NV_multisample_coverage 1
+
+#define WGL_COVERAGE_SAMPLES_NV 0x2042
+#define WGL_COLOR_SAMPLES_NV 0x20B9
+
+#define WGLEW_NV_multisample_coverage WGLEW_GET_VAR(__WGLEW_NV_multisample_coverage)
+
+#endif /* WGL_NV_multisample_coverage */
+
+/* -------------------------- WGL_NV_present_video ------------------------- */
+
+#ifndef WGL_NV_present_video
+#define WGL_NV_present_video 1
+
+#define WGL_NUM_VIDEO_SLOTS_NV 0x20F0
+
+DECLARE_HANDLE(HVIDEOOUTPUTDEVICENV);
+
+typedef BOOL (WINAPI * PFNWGLBINDVIDEODEVICENVPROC) (HDC hDc, unsigned int uVideoSlot, HVIDEOOUTPUTDEVICENV hVideoDevice, const int* piAttribList);
+typedef int (WINAPI * PFNWGLENUMERATEVIDEODEVICESNVPROC) (HDC hDc, HVIDEOOUTPUTDEVICENV* phDeviceList);
+typedef BOOL (WINAPI * PFNWGLQUERYCURRENTCONTEXTNVPROC) (int iAttribute, int* piValue);
+
+#define wglBindVideoDeviceNV WGLEW_GET_FUN(__wglewBindVideoDeviceNV)
+#define wglEnumerateVideoDevicesNV WGLEW_GET_FUN(__wglewEnumerateVideoDevicesNV)
+#define wglQueryCurrentContextNV WGLEW_GET_FUN(__wglewQueryCurrentContextNV)
+
+#define WGLEW_NV_present_video WGLEW_GET_VAR(__WGLEW_NV_present_video)
+
+#endif /* WGL_NV_present_video */
+
+/* ---------------------- WGL_NV_render_depth_texture ---------------------- */
+
+#ifndef WGL_NV_render_depth_texture
+#define WGL_NV_render_depth_texture 1
+
+#define WGL_NO_TEXTURE_ARB 0x2077
+#define WGL_BIND_TO_TEXTURE_DEPTH_NV 0x20A3
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_DEPTH_NV 0x20A4
+#define WGL_DEPTH_TEXTURE_FORMAT_NV 0x20A5
+#define WGL_TEXTURE_DEPTH_COMPONENT_NV 0x20A6
+#define WGL_DEPTH_COMPONENT_NV 0x20A7
+
+#define WGLEW_NV_render_depth_texture WGLEW_GET_VAR(__WGLEW_NV_render_depth_texture)
+
+#endif /* WGL_NV_render_depth_texture */
+
+/* -------------------- WGL_NV_render_texture_rectangle -------------------- */
+
+#ifndef WGL_NV_render_texture_rectangle
+#define WGL_NV_render_texture_rectangle 1
+
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_RGB_NV 0x20A0
+#define WGL_BIND_TO_TEXTURE_RECTANGLE_RGBA_NV 0x20A1
+#define WGL_TEXTURE_RECTANGLE_NV 0x20A2
+
+#define WGLEW_NV_render_texture_rectangle WGLEW_GET_VAR(__WGLEW_NV_render_texture_rectangle)
+
+#endif /* WGL_NV_render_texture_rectangle */
+
+/* --------------------------- WGL_NV_swap_group --------------------------- */
+
+#ifndef WGL_NV_swap_group
+#define WGL_NV_swap_group 1
+
+typedef BOOL (WINAPI * PFNWGLBINDSWAPBARRIERNVPROC) (GLuint group, GLuint barrier);
+typedef BOOL (WINAPI * PFNWGLJOINSWAPGROUPNVPROC) (HDC hDC, GLuint group);
+typedef BOOL (WINAPI * PFNWGLQUERYFRAMECOUNTNVPROC) (HDC hDC, GLuint* count);
+typedef BOOL (WINAPI * PFNWGLQUERYMAXSWAPGROUPSNVPROC) (HDC hDC, GLuint* maxGroups, GLuint *maxBarriers);
+typedef BOOL (WINAPI * PFNWGLQUERYSWAPGROUPNVPROC) (HDC hDC, GLuint* group, GLuint *barrier);
+typedef BOOL (WINAPI * PFNWGLRESETFRAMECOUNTNVPROC) (HDC hDC);
+
+#define wglBindSwapBarrierNV WGLEW_GET_FUN(__wglewBindSwapBarrierNV)
+#define wglJoinSwapGroupNV WGLEW_GET_FUN(__wglewJoinSwapGroupNV)
+#define wglQueryFrameCountNV WGLEW_GET_FUN(__wglewQueryFrameCountNV)
+#define wglQueryMaxSwapGroupsNV WGLEW_GET_FUN(__wglewQueryMaxSwapGroupsNV)
+#define wglQuerySwapGroupNV WGLEW_GET_FUN(__wglewQuerySwapGroupNV)
+#define wglResetFrameCountNV WGLEW_GET_FUN(__wglewResetFrameCountNV)
+
+#define WGLEW_NV_swap_group WGLEW_GET_VAR(__WGLEW_NV_swap_group)
+
+#endif /* WGL_NV_swap_group */
+
+/* ----------------------- WGL_NV_vertex_array_range ----------------------- */
+
+#ifndef WGL_NV_vertex_array_range
+#define WGL_NV_vertex_array_range 1
+
+typedef void * (WINAPI * PFNWGLALLOCATEMEMORYNVPROC) (GLsizei size, GLfloat readFrequency, GLfloat writeFrequency, GLfloat priority);
+typedef void (WINAPI * PFNWGLFREEMEMORYNVPROC) (void *pointer);
+
+#define wglAllocateMemoryNV WGLEW_GET_FUN(__wglewAllocateMemoryNV)
+#define wglFreeMemoryNV WGLEW_GET_FUN(__wglewFreeMemoryNV)
+
+#define WGLEW_NV_vertex_array_range WGLEW_GET_VAR(__WGLEW_NV_vertex_array_range)
+
+#endif /* WGL_NV_vertex_array_range */
+
+/* -------------------------- WGL_NV_video_capture ------------------------- */
+
+#ifndef WGL_NV_video_capture
+#define WGL_NV_video_capture 1
+
+#define WGL_UNIQUE_ID_NV 0x20CE
+#define WGL_NUM_VIDEO_CAPTURE_SLOTS_NV 0x20CF
+
+DECLARE_HANDLE(HVIDEOINPUTDEVICENV);
+
+typedef BOOL (WINAPI * PFNWGLBINDVIDEOCAPTUREDEVICENVPROC) (UINT uVideoSlot, HVIDEOINPUTDEVICENV hDevice);
+typedef UINT (WINAPI * PFNWGLENUMERATEVIDEOCAPTUREDEVICESNVPROC) (HDC hDc, HVIDEOINPUTDEVICENV* phDeviceList);
+typedef BOOL (WINAPI * PFNWGLLOCKVIDEOCAPTUREDEVICENVPROC) (HDC hDc, HVIDEOINPUTDEVICENV hDevice);
+typedef BOOL (WINAPI * PFNWGLQUERYVIDEOCAPTUREDEVICENVPROC) (HDC hDc, HVIDEOINPUTDEVICENV hDevice, int iAttribute, int* piValue);
+typedef BOOL (WINAPI * PFNWGLRELEASEVIDEOCAPTUREDEVICENVPROC) (HDC hDc, HVIDEOINPUTDEVICENV hDevice);
+
+#define wglBindVideoCaptureDeviceNV WGLEW_GET_FUN(__wglewBindVideoCaptureDeviceNV)
+#define wglEnumerateVideoCaptureDevicesNV WGLEW_GET_FUN(__wglewEnumerateVideoCaptureDevicesNV)
+#define wglLockVideoCaptureDeviceNV WGLEW_GET_FUN(__wglewLockVideoCaptureDeviceNV)
+#define wglQueryVideoCaptureDeviceNV WGLEW_GET_FUN(__wglewQueryVideoCaptureDeviceNV)
+#define wglReleaseVideoCaptureDeviceNV WGLEW_GET_FUN(__wglewReleaseVideoCaptureDeviceNV)
+
+#define WGLEW_NV_video_capture WGLEW_GET_VAR(__WGLEW_NV_video_capture)
+
+#endif /* WGL_NV_video_capture */
+
+/* -------------------------- WGL_NV_video_output -------------------------- */
+
+#ifndef WGL_NV_video_output
+#define WGL_NV_video_output 1
+
+#define WGL_BIND_TO_VIDEO_RGB_NV 0x20C0
+#define WGL_BIND_TO_VIDEO_RGBA_NV 0x20C1
+#define WGL_BIND_TO_VIDEO_RGB_AND_DEPTH_NV 0x20C2
+#define WGL_VIDEO_OUT_COLOR_NV 0x20C3
+#define WGL_VIDEO_OUT_ALPHA_NV 0x20C4
+#define WGL_VIDEO_OUT_DEPTH_NV 0x20C5
+#define WGL_VIDEO_OUT_COLOR_AND_ALPHA_NV 0x20C6
+#define WGL_VIDEO_OUT_COLOR_AND_DEPTH_NV 0x20C7
+#define WGL_VIDEO_OUT_FRAME 0x20C8
+#define WGL_VIDEO_OUT_FIELD_1 0x20C9
+#define WGL_VIDEO_OUT_FIELD_2 0x20CA
+#define WGL_VIDEO_OUT_STACKED_FIELDS_1_2 0x20CB
+#define WGL_VIDEO_OUT_STACKED_FIELDS_2_1 0x20CC
+
+DECLARE_HANDLE(HPVIDEODEV);
+
+typedef BOOL (WINAPI * PFNWGLBINDVIDEOIMAGENVPROC) (HPVIDEODEV hVideoDevice, HPBUFFERARB hPbuffer, int iVideoBuffer);
+typedef BOOL (WINAPI * PFNWGLGETVIDEODEVICENVPROC) (HDC hDC, int numDevices, HPVIDEODEV* hVideoDevice);
+typedef BOOL (WINAPI * PFNWGLGETVIDEOINFONVPROC) (HPVIDEODEV hpVideoDevice, unsigned long* pulCounterOutputPbuffer, unsigned long *pulCounterOutputVideo);
+typedef BOOL (WINAPI * PFNWGLRELEASEVIDEODEVICENVPROC) (HPVIDEODEV hVideoDevice);
+typedef BOOL (WINAPI * PFNWGLRELEASEVIDEOIMAGENVPROC) (HPBUFFERARB hPbuffer, int iVideoBuffer);
+typedef BOOL (WINAPI * PFNWGLSENDPBUFFERTOVIDEONVPROC) (HPBUFFERARB hPbuffer, int iBufferType, unsigned long* pulCounterPbuffer, BOOL bBlock);
+
+#define wglBindVideoImageNV WGLEW_GET_FUN(__wglewBindVideoImageNV)
+#define wglGetVideoDeviceNV WGLEW_GET_FUN(__wglewGetVideoDeviceNV)
+#define wglGetVideoInfoNV WGLEW_GET_FUN(__wglewGetVideoInfoNV)
+#define wglReleaseVideoDeviceNV WGLEW_GET_FUN(__wglewReleaseVideoDeviceNV)
+#define wglReleaseVideoImageNV WGLEW_GET_FUN(__wglewReleaseVideoImageNV)
+#define wglSendPbufferToVideoNV WGLEW_GET_FUN(__wglewSendPbufferToVideoNV)
+
+#define WGLEW_NV_video_output WGLEW_GET_VAR(__WGLEW_NV_video_output)
+
+#endif /* WGL_NV_video_output */
+
+/* -------------------------- WGL_OML_sync_control ------------------------- */
+
+#ifndef WGL_OML_sync_control
+#define WGL_OML_sync_control 1
+
+typedef BOOL (WINAPI * PFNWGLGETMSCRATEOMLPROC) (HDC hdc, INT32* numerator, INT32 *denominator);
+typedef BOOL (WINAPI * PFNWGLGETSYNCVALUESOMLPROC) (HDC hdc, INT64* ust, INT64 *msc, INT64 *sbc);
+typedef INT64 (WINAPI * PFNWGLSWAPBUFFERSMSCOMLPROC) (HDC hdc, INT64 target_msc, INT64 divisor, INT64 remainder);
+typedef INT64 (WINAPI * PFNWGLSWAPLAYERBUFFERSMSCOMLPROC) (HDC hdc, INT fuPlanes, INT64 target_msc, INT64 divisor, INT64 remainder);
+typedef BOOL (WINAPI * PFNWGLWAITFORMSCOMLPROC) (HDC hdc, INT64 target_msc, INT64 divisor, INT64 remainder, INT64* ust, INT64 *msc, INT64 *sbc);
+typedef BOOL (WINAPI * PFNWGLWAITFORSBCOMLPROC) (HDC hdc, INT64 target_sbc, INT64* ust, INT64 *msc, INT64 *sbc);
+
+#define wglGetMscRateOML WGLEW_GET_FUN(__wglewGetMscRateOML)
+#define wglGetSyncValuesOML WGLEW_GET_FUN(__wglewGetSyncValuesOML)
+#define wglSwapBuffersMscOML WGLEW_GET_FUN(__wglewSwapBuffersMscOML)
+#define wglSwapLayerBuffersMscOML WGLEW_GET_FUN(__wglewSwapLayerBuffersMscOML)
+#define wglWaitForMscOML WGLEW_GET_FUN(__wglewWaitForMscOML)
+#define wglWaitForSbcOML WGLEW_GET_FUN(__wglewWaitForSbcOML)
+
+#define WGLEW_OML_sync_control WGLEW_GET_VAR(__WGLEW_OML_sync_control)
+
+#endif /* WGL_OML_sync_control */
+
+/* ------------------------------------------------------------------------- */
+
+#ifdef GLEW_MX
+#define WGLEW_FUN_EXPORT
+#define WGLEW_VAR_EXPORT
+#else
+#define WGLEW_FUN_EXPORT GLEW_FUN_EXPORT
+#define WGLEW_VAR_EXPORT GLEW_VAR_EXPORT
+#endif /* GLEW_MX */
+
+#ifdef GLEW_MX
+struct WGLEWContextStruct
+{
+#endif /* GLEW_MX */
+
+WGLEW_FUN_EXPORT PFNWGLSETSTEREOEMITTERSTATE3DLPROC __wglewSetStereoEmitterState3DL;
+
+WGLEW_FUN_EXPORT PFNWGLBLITCONTEXTFRAMEBUFFERAMDPROC __wglewBlitContextFramebufferAMD;
+WGLEW_FUN_EXPORT PFNWGLCREATEASSOCIATEDCONTEXTAMDPROC __wglewCreateAssociatedContextAMD;
+WGLEW_FUN_EXPORT PFNWGLCREATEASSOCIATEDCONTEXTATTRIBSAMDPROC __wglewCreateAssociatedContextAttribsAMD;
+WGLEW_FUN_EXPORT PFNWGLDELETEASSOCIATEDCONTEXTAMDPROC __wglewDeleteAssociatedContextAMD;
+WGLEW_FUN_EXPORT PFNWGLGETCONTEXTGPUIDAMDPROC __wglewGetContextGPUIDAMD;
+WGLEW_FUN_EXPORT PFNWGLGETCURRENTASSOCIATEDCONTEXTAMDPROC __wglewGetCurrentAssociatedContextAMD;
+WGLEW_FUN_EXPORT PFNWGLGETGPUIDSAMDPROC __wglewGetGPUIDsAMD;
+WGLEW_FUN_EXPORT PFNWGLGETGPUINFOAMDPROC __wglewGetGPUInfoAMD;
+WGLEW_FUN_EXPORT PFNWGLMAKEASSOCIATEDCONTEXTCURRENTAMDPROC __wglewMakeAssociatedContextCurrentAMD;
+
+WGLEW_FUN_EXPORT PFNWGLCREATEBUFFERREGIONARBPROC __wglewCreateBufferRegionARB;
+WGLEW_FUN_EXPORT PFNWGLDELETEBUFFERREGIONARBPROC __wglewDeleteBufferRegionARB;
+WGLEW_FUN_EXPORT PFNWGLRESTOREBUFFERREGIONARBPROC __wglewRestoreBufferRegionARB;
+WGLEW_FUN_EXPORT PFNWGLSAVEBUFFERREGIONARBPROC __wglewSaveBufferRegionARB;
+
+WGLEW_FUN_EXPORT PFNWGLCREATECONTEXTATTRIBSARBPROC __wglewCreateContextAttribsARB;
+
+WGLEW_FUN_EXPORT PFNWGLGETEXTENSIONSSTRINGARBPROC __wglewGetExtensionsStringARB;
+
+WGLEW_FUN_EXPORT PFNWGLGETCURRENTREADDCARBPROC __wglewGetCurrentReadDCARB;
+WGLEW_FUN_EXPORT PFNWGLMAKECONTEXTCURRENTARBPROC __wglewMakeContextCurrentARB;
+
+WGLEW_FUN_EXPORT PFNWGLCREATEPBUFFERARBPROC __wglewCreatePbufferARB;
+WGLEW_FUN_EXPORT PFNWGLDESTROYPBUFFERARBPROC __wglewDestroyPbufferARB;
+WGLEW_FUN_EXPORT PFNWGLGETPBUFFERDCARBPROC __wglewGetPbufferDCARB;
+WGLEW_FUN_EXPORT PFNWGLQUERYPBUFFERARBPROC __wglewQueryPbufferARB;
+WGLEW_FUN_EXPORT PFNWGLRELEASEPBUFFERDCARBPROC __wglewReleasePbufferDCARB;
+
+WGLEW_FUN_EXPORT PFNWGLCHOOSEPIXELFORMATARBPROC __wglewChoosePixelFormatARB;
+WGLEW_FUN_EXPORT PFNWGLGETPIXELFORMATATTRIBFVARBPROC __wglewGetPixelFormatAttribfvARB;
+WGLEW_FUN_EXPORT PFNWGLGETPIXELFORMATATTRIBIVARBPROC __wglewGetPixelFormatAttribivARB;
+
+WGLEW_FUN_EXPORT PFNWGLBINDTEXIMAGEARBPROC __wglewBindTexImageARB;
+WGLEW_FUN_EXPORT PFNWGLRELEASETEXIMAGEARBPROC __wglewReleaseTexImageARB;
+WGLEW_FUN_EXPORT PFNWGLSETPBUFFERATTRIBARBPROC __wglewSetPbufferAttribARB;
+
+WGLEW_FUN_EXPORT PFNWGLBINDDISPLAYCOLORTABLEEXTPROC __wglewBindDisplayColorTableEXT;
+WGLEW_FUN_EXPORT PFNWGLCREATEDISPLAYCOLORTABLEEXTPROC __wglewCreateDisplayColorTableEXT;
+WGLEW_FUN_EXPORT PFNWGLDESTROYDISPLAYCOLORTABLEEXTPROC __wglewDestroyDisplayColorTableEXT;
+WGLEW_FUN_EXPORT PFNWGLLOADDISPLAYCOLORTABLEEXTPROC __wglewLoadDisplayColorTableEXT;
+
+WGLEW_FUN_EXPORT PFNWGLGETEXTENSIONSSTRINGEXTPROC __wglewGetExtensionsStringEXT;
+
+WGLEW_FUN_EXPORT PFNWGLGETCURRENTREADDCEXTPROC __wglewGetCurrentReadDCEXT;
+WGLEW_FUN_EXPORT PFNWGLMAKECONTEXTCURRENTEXTPROC __wglewMakeContextCurrentEXT;
+
+WGLEW_FUN_EXPORT PFNWGLCREATEPBUFFEREXTPROC __wglewCreatePbufferEXT;
+WGLEW_FUN_EXPORT PFNWGLDESTROYPBUFFEREXTPROC __wglewDestroyPbufferEXT;
+WGLEW_FUN_EXPORT PFNWGLGETPBUFFERDCEXTPROC __wglewGetPbufferDCEXT;
+WGLEW_FUN_EXPORT PFNWGLQUERYPBUFFEREXTPROC __wglewQueryPbufferEXT;
+WGLEW_FUN_EXPORT PFNWGLRELEASEPBUFFERDCEXTPROC __wglewReleasePbufferDCEXT;
+
+WGLEW_FUN_EXPORT PFNWGLCHOOSEPIXELFORMATEXTPROC __wglewChoosePixelFormatEXT;
+WGLEW_FUN_EXPORT PFNWGLGETPIXELFORMATATTRIBFVEXTPROC __wglewGetPixelFormatAttribfvEXT;
+WGLEW_FUN_EXPORT PFNWGLGETPIXELFORMATATTRIBIVEXTPROC __wglewGetPixelFormatAttribivEXT;
+
+WGLEW_FUN_EXPORT PFNWGLGETSWAPINTERVALEXTPROC __wglewGetSwapIntervalEXT;
+WGLEW_FUN_EXPORT PFNWGLSWAPINTERVALEXTPROC __wglewSwapIntervalEXT;
+
+WGLEW_FUN_EXPORT PFNWGLGETDIGITALVIDEOPARAMETERSI3DPROC __wglewGetDigitalVideoParametersI3D;
+WGLEW_FUN_EXPORT PFNWGLSETDIGITALVIDEOPARAMETERSI3DPROC __wglewSetDigitalVideoParametersI3D;
+
+WGLEW_FUN_EXPORT PFNWGLGETGAMMATABLEI3DPROC __wglewGetGammaTableI3D;
+WGLEW_FUN_EXPORT PFNWGLGETGAMMATABLEPARAMETERSI3DPROC __wglewGetGammaTableParametersI3D;
+WGLEW_FUN_EXPORT PFNWGLSETGAMMATABLEI3DPROC __wglewSetGammaTableI3D;
+WGLEW_FUN_EXPORT PFNWGLSETGAMMATABLEPARAMETERSI3DPROC __wglewSetGammaTableParametersI3D;
+
+WGLEW_FUN_EXPORT PFNWGLDISABLEGENLOCKI3DPROC __wglewDisableGenlockI3D;
+WGLEW_FUN_EXPORT PFNWGLENABLEGENLOCKI3DPROC __wglewEnableGenlockI3D;
+WGLEW_FUN_EXPORT PFNWGLGENLOCKSAMPLERATEI3DPROC __wglewGenlockSampleRateI3D;
+WGLEW_FUN_EXPORT PFNWGLGENLOCKSOURCEDELAYI3DPROC __wglewGenlockSourceDelayI3D;
+WGLEW_FUN_EXPORT PFNWGLGENLOCKSOURCEEDGEI3DPROC __wglewGenlockSourceEdgeI3D;
+WGLEW_FUN_EXPORT PFNWGLGENLOCKSOURCEI3DPROC __wglewGenlockSourceI3D;
+WGLEW_FUN_EXPORT PFNWGLGETGENLOCKSAMPLERATEI3DPROC __wglewGetGenlockSampleRateI3D;
+WGLEW_FUN_EXPORT PFNWGLGETGENLOCKSOURCEDELAYI3DPROC __wglewGetGenlockSourceDelayI3D;
+WGLEW_FUN_EXPORT PFNWGLGETGENLOCKSOURCEEDGEI3DPROC __wglewGetGenlockSourceEdgeI3D;
+WGLEW_FUN_EXPORT PFNWGLGETGENLOCKSOURCEI3DPROC __wglewGetGenlockSourceI3D;
+WGLEW_FUN_EXPORT PFNWGLISENABLEDGENLOCKI3DPROC __wglewIsEnabledGenlockI3D;
+WGLEW_FUN_EXPORT PFNWGLQUERYGENLOCKMAXSOURCEDELAYI3DPROC __wglewQueryGenlockMaxSourceDelayI3D;
+
+WGLEW_FUN_EXPORT PFNWGLASSOCIATEIMAGEBUFFEREVENTSI3DPROC __wglewAssociateImageBufferEventsI3D;
+WGLEW_FUN_EXPORT PFNWGLCREATEIMAGEBUFFERI3DPROC __wglewCreateImageBufferI3D;
+WGLEW_FUN_EXPORT PFNWGLDESTROYIMAGEBUFFERI3DPROC __wglewDestroyImageBufferI3D;
+WGLEW_FUN_EXPORT PFNWGLRELEASEIMAGEBUFFEREVENTSI3DPROC __wglewReleaseImageBufferEventsI3D;
+
+WGLEW_FUN_EXPORT PFNWGLDISABLEFRAMELOCKI3DPROC __wglewDisableFrameLockI3D;
+WGLEW_FUN_EXPORT PFNWGLENABLEFRAMELOCKI3DPROC __wglewEnableFrameLockI3D;
+WGLEW_FUN_EXPORT PFNWGLISENABLEDFRAMELOCKI3DPROC __wglewIsEnabledFrameLockI3D;
+WGLEW_FUN_EXPORT PFNWGLQUERYFRAMELOCKMASTERI3DPROC __wglewQueryFrameLockMasterI3D;
+
+WGLEW_FUN_EXPORT PFNWGLBEGINFRAMETRACKINGI3DPROC __wglewBeginFrameTrackingI3D;
+WGLEW_FUN_EXPORT PFNWGLENDFRAMETRACKINGI3DPROC __wglewEndFrameTrackingI3D;
+WGLEW_FUN_EXPORT PFNWGLGETFRAMEUSAGEI3DPROC __wglewGetFrameUsageI3D;
+WGLEW_FUN_EXPORT PFNWGLQUERYFRAMETRACKINGI3DPROC __wglewQueryFrameTrackingI3D;
+
+WGLEW_FUN_EXPORT PFNWGLDXCLOSEDEVICENVPROC __wglewDXCloseDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLDXLOCKOBJECTSNVPROC __wglewDXLockObjectsNV;
+WGLEW_FUN_EXPORT PFNWGLDXOBJECTACCESSNVPROC __wglewDXObjectAccessNV;
+WGLEW_FUN_EXPORT PFNWGLDXOPENDEVICENVPROC __wglewDXOpenDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLDXREGISTEROBJECTNVPROC __wglewDXRegisterObjectNV;
+WGLEW_FUN_EXPORT PFNWGLDXSETRESOURCESHAREHANDLENVPROC __wglewDXSetResourceShareHandleNV;
+WGLEW_FUN_EXPORT PFNWGLDXUNLOCKOBJECTSNVPROC __wglewDXUnlockObjectsNV;
+WGLEW_FUN_EXPORT PFNWGLDXUNREGISTEROBJECTNVPROC __wglewDXUnregisterObjectNV;
+
+WGLEW_FUN_EXPORT PFNWGLCOPYIMAGESUBDATANVPROC __wglewCopyImageSubDataNV;
+
+WGLEW_FUN_EXPORT PFNWGLDELAYBEFORESWAPNVPROC __wglewDelayBeforeSwapNV;
+
+WGLEW_FUN_EXPORT PFNWGLCREATEAFFINITYDCNVPROC __wglewCreateAffinityDCNV;
+WGLEW_FUN_EXPORT PFNWGLDELETEDCNVPROC __wglewDeleteDCNV;
+WGLEW_FUN_EXPORT PFNWGLENUMGPUDEVICESNVPROC __wglewEnumGpuDevicesNV;
+WGLEW_FUN_EXPORT PFNWGLENUMGPUSFROMAFFINITYDCNVPROC __wglewEnumGpusFromAffinityDCNV;
+WGLEW_FUN_EXPORT PFNWGLENUMGPUSNVPROC __wglewEnumGpusNV;
+
+WGLEW_FUN_EXPORT PFNWGLBINDVIDEODEVICENVPROC __wglewBindVideoDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLENUMERATEVIDEODEVICESNVPROC __wglewEnumerateVideoDevicesNV;
+WGLEW_FUN_EXPORT PFNWGLQUERYCURRENTCONTEXTNVPROC __wglewQueryCurrentContextNV;
+
+WGLEW_FUN_EXPORT PFNWGLBINDSWAPBARRIERNVPROC __wglewBindSwapBarrierNV;
+WGLEW_FUN_EXPORT PFNWGLJOINSWAPGROUPNVPROC __wglewJoinSwapGroupNV;
+WGLEW_FUN_EXPORT PFNWGLQUERYFRAMECOUNTNVPROC __wglewQueryFrameCountNV;
+WGLEW_FUN_EXPORT PFNWGLQUERYMAXSWAPGROUPSNVPROC __wglewQueryMaxSwapGroupsNV;
+WGLEW_FUN_EXPORT PFNWGLQUERYSWAPGROUPNVPROC __wglewQuerySwapGroupNV;
+WGLEW_FUN_EXPORT PFNWGLRESETFRAMECOUNTNVPROC __wglewResetFrameCountNV;
+
+WGLEW_FUN_EXPORT PFNWGLALLOCATEMEMORYNVPROC __wglewAllocateMemoryNV;
+WGLEW_FUN_EXPORT PFNWGLFREEMEMORYNVPROC __wglewFreeMemoryNV;
+
+WGLEW_FUN_EXPORT PFNWGLBINDVIDEOCAPTUREDEVICENVPROC __wglewBindVideoCaptureDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLENUMERATEVIDEOCAPTUREDEVICESNVPROC __wglewEnumerateVideoCaptureDevicesNV;
+WGLEW_FUN_EXPORT PFNWGLLOCKVIDEOCAPTUREDEVICENVPROC __wglewLockVideoCaptureDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLQUERYVIDEOCAPTUREDEVICENVPROC __wglewQueryVideoCaptureDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLRELEASEVIDEOCAPTUREDEVICENVPROC __wglewReleaseVideoCaptureDeviceNV;
+
+WGLEW_FUN_EXPORT PFNWGLBINDVIDEOIMAGENVPROC __wglewBindVideoImageNV;
+WGLEW_FUN_EXPORT PFNWGLGETVIDEODEVICENVPROC __wglewGetVideoDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLGETVIDEOINFONVPROC __wglewGetVideoInfoNV;
+WGLEW_FUN_EXPORT PFNWGLRELEASEVIDEODEVICENVPROC __wglewReleaseVideoDeviceNV;
+WGLEW_FUN_EXPORT PFNWGLRELEASEVIDEOIMAGENVPROC __wglewReleaseVideoImageNV;
+WGLEW_FUN_EXPORT PFNWGLSENDPBUFFERTOVIDEONVPROC __wglewSendPbufferToVideoNV;
+
+WGLEW_FUN_EXPORT PFNWGLGETMSCRATEOMLPROC __wglewGetMscRateOML;
+WGLEW_FUN_EXPORT PFNWGLGETSYNCVALUESOMLPROC __wglewGetSyncValuesOML;
+WGLEW_FUN_EXPORT PFNWGLSWAPBUFFERSMSCOMLPROC __wglewSwapBuffersMscOML;
+WGLEW_FUN_EXPORT PFNWGLSWAPLAYERBUFFERSMSCOMLPROC __wglewSwapLayerBuffersMscOML;
+WGLEW_FUN_EXPORT PFNWGLWAITFORMSCOMLPROC __wglewWaitForMscOML;
+WGLEW_FUN_EXPORT PFNWGLWAITFORSBCOMLPROC __wglewWaitForSbcOML;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_3DFX_multisample;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_3DL_stereo_control;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_AMD_gpu_association;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_buffer_region;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_context_flush_control;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_create_context;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_create_context_profile;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_create_context_robustness;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_extensions_string;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_framebuffer_sRGB;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_make_current_read;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_multisample;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_pbuffer;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_pixel_format;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_pixel_format_float;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_render_texture;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_robustness_application_isolation;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ARB_robustness_share_group_isolation;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ATI_pixel_format_float;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_ATI_render_texture_rectangle;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_create_context_es2_profile;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_create_context_es_profile;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_depth_float;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_display_color_table;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_extensions_string;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_framebuffer_sRGB;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_make_current_read;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_multisample;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_pbuffer;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_pixel_format;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_pixel_format_packed_float;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_swap_control;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_EXT_swap_control_tear;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_I3D_digital_video_control;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_I3D_gamma;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_I3D_genlock;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_I3D_image_buffer;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_I3D_swap_frame_lock;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_I3D_swap_frame_usage;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_DX_interop;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_DX_interop2;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_copy_image;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_delay_before_swap;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_float_buffer;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_gpu_affinity;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_multisample_coverage;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_present_video;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_render_depth_texture;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_render_texture_rectangle;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_swap_group;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_vertex_array_range;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_video_capture;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_NV_video_output;
+WGLEW_VAR_EXPORT GLboolean __WGLEW_OML_sync_control;
+
+#ifdef GLEW_MX
+}; /* WGLEWContextStruct */
+#endif /* GLEW_MX */
+
+/* ------------------------------------------------------------------------- */
+
+#ifdef GLEW_MX
+
+typedef struct WGLEWContextStruct WGLEWContext;
+GLEWAPI GLenum GLEWAPIENTRY wglewContextInit (WGLEWContext *ctx);
+GLEWAPI GLboolean GLEWAPIENTRY wglewContextIsSupported (const WGLEWContext *ctx, const char *name);
+
+#define wglewInit() wglewContextInit(wglewGetContext())
+#define wglewIsSupported(x) wglewContextIsSupported(wglewGetContext(), x)
+
+#define WGLEW_GET_VAR(x) (*(const GLboolean*)&(wglewGetContext()->x))
+#define WGLEW_GET_FUN(x) wglewGetContext()->x
+
+#else /* GLEW_MX */
+
+GLEWAPI GLenum GLEWAPIENTRY wglewInit ();
+GLEWAPI GLboolean GLEWAPIENTRY wglewIsSupported (const char *name);
+
+#define WGLEW_GET_VAR(x) (*(const GLboolean*)&x)
+#define WGLEW_GET_FUN(x) x
+
+#endif /* GLEW_MX */
+
+GLEWAPI GLboolean GLEWAPIENTRY wglewGetExtension (const char *name);
+
+#ifdef __cplusplus
+}
+#endif
+
+#undef GLEWAPI
+
+#endif /* __wglew_h__ */