Skip to content
Projects
Groups
Snippets
Help
Loading...
Help
Support
Keyboard shortcuts
?
Submit feedback
Contribute to GitLab
Sign in
Toggle navigation
I
INMOST
Project overview
Project overview
Details
Activity
Releases
Repository
Repository
Files
Commits
Branches
Tags
Contributors
Graph
Compare
Issues
0
Issues
0
List
Boards
Labels
Milestones
Merge Requests
0
Merge Requests
0
CI / CD
CI / CD
Pipelines
Jobs
Schedules
Analytics
Analytics
CI / CD
Repository
Value Stream
Wiki
Wiki
Snippets
Snippets
Members
Members
Collapse sidebar
Close sidebar
Activity
Graph
Create a new issue
Jobs
Commits
Issue Boards
Open sidebar
Kirill Terekhov
INMOST
Commits
f7249beb
Commit
f7249beb
authored
May 07, 2018
by
Kirill Terekhov
Browse files
Options
Browse Files
Download
Email Patches
Plain Diff
kdtree in K-means clustering
For faster calculation of closest cluster, seems to be slower in practice
parent
34663ad7
Changes
1
Hide whitespace changes
Inline
Side-by-side
Showing
1 changed file
with
130 additions
and
4 deletions
+130
-4
Examples/GridTools/kmeans.cpp
Examples/GridTools/kmeans.cpp
+130
-4
No files found.
Examples/GridTools/kmeans.cpp
View file @
f7249beb
...
...
@@ -5,8 +5,124 @@ using namespace INMOST;
//todo: want to separate all faces into those that share edges
//see todo: inside text
struct
kdtree
{
struct
entry
{
double
v
[
3
];
int
pos
;
};
struct
compare
{
int
comp
;
compare
(
int
c
)
:
comp
(
c
)
{}
compare
(
const
compare
&
b
)
:
comp
(
b
.
comp
)
{}
bool
operator
()
(
const
entry
&
a
,
const
entry
&
b
)
{
return
a
.
v
[
comp
]
<
b
.
v
[
comp
];
}
};
entry
*
set
;
kdtree
*
l
,
*
r
;
int
size
;
double
split
;
kdtree
()
{
size
=
0
;
set
=
NULL
;
l
=
NULL
;
r
=
NULL
;
}
~
kdtree
()
{
if
(
l
!=
NULL
)
delete
l
;
if
(
r
!=
NULL
)
delete
r
;
set
=
NULL
;
size
=
0
;
}
void
resize
(
int
set_size
)
{
clear
();
size
=
set_size
;
set
=
new
entry
[
size
];
}
void
clear
()
{
if
(
set
!=
NULL
)
delete
[]
set
;
size
=
0
;
}
void
build_tree
(
int
comp
)
{
if
(
l
!=
NULL
)
{
delete
l
;
l
=
NULL
;}
if
(
r
!=
NULL
)
{
delete
r
;
r
=
NULL
;}
if
(
size
>
1
)
{
std
::
sort
(
set
,
set
+
size
,
compare
(
comp
));
int
middle
=
size
/
2
;
l
=
new
kdtree
;
l
->
set
=
set
;
l
->
size
=
middle
;
r
=
new
kdtree
;
r
->
set
=
set
+
middle
;
r
->
size
=
size
-
middle
;
split
=
set
[
middle
].
v
[
comp
];
l
->
build_tree
((
comp
+
1
)
%
3
);
r
->
build_tree
((
comp
+
1
)
%
3
);
}
}
void
build
(
double
*
coords
)
{
for
(
int
k
=
0
;
k
<
size
;
++
k
)
{
set
[
k
].
v
[
0
]
=
coords
[
k
*
3
+
0
];
set
[
k
].
v
[
1
]
=
coords
[
k
*
3
+
1
];
set
[
k
].
v
[
2
]
=
coords
[
k
*
3
+
2
];
set
[
k
].
pos
=
k
;
}
build_tree
(
0
);
}
void
closest_tree
(
double
p
[
3
],
int
&
pos
,
double
&
min_dist
,
int
comp
)
{
if
(
size
==
0
)
{
std
::
cout
<<
"size is "
<<
size
<<
std
::
endl
;
}
else
if
(
size
==
1
)
{
double
v
[
3
];
v
[
0
]
=
(
set
[
0
].
v
[
0
]
-
p
[
0
]);
v
[
1
]
=
(
set
[
0
].
v
[
1
]
-
p
[
1
]);
v
[
2
]
=
(
set
[
0
].
v
[
2
]
-
p
[
2
]);
double
dist
=
v
[
0
]
*
v
[
0
]
+
v
[
1
]
*
v
[
1
]
+
v
[
2
]
*
v
[
2
];
if
(
dist
<
min_dist
)
{
min_dist
=
dist
;
pos
=
set
[
0
].
pos
;
}
}
else
{
if
(
p
[
comp
]
<
split
)
{
// search left first
l
->
closest_tree
(
p
,
pos
,
min_dist
,
(
comp
+
1
)
%
3
);
if
(
p
[
comp
]
+
min_dist
>=
split
)
r
->
closest_tree
(
p
,
pos
,
min_dist
,
(
comp
+
1
)
%
3
);
}
else
{
// search right first
r
->
closest_tree
(
p
,
pos
,
min_dist
,
(
comp
+
1
)
%
3
);
if
(
p
[
comp
]
-
min_dist
<=
split
)
l
->
closest_tree
(
p
,
pos
,
min_dist
,
(
comp
+
1
)
%
3
);
}
}
}
int
closest
(
double
p
[
3
])
{
double
min_dist
=
std
::
numeric_limits
<
double
>::
max
();
int
ret
;
closest_tree
(
p
,
ret
,
min_dist
,
0
);
return
ret
;
}
};
int
main
(
int
argc
,
char
**
argv
)
{
...
...
@@ -149,6 +265,8 @@ int main(int argc, char ** argv)
std
::
vector
<
double
>
cluster_center_tmp
(
K
*
3
);
std
::
vector
<
int
>
cluster_npoints_tmp
(
K
);
#endif
//kdtree tree;
//tree.resize(K);
if
(
m
.
GetProcessorRank
()
==
0
)
...
...
@@ -209,8 +327,12 @@ int main(int argc, char ** argv)
double
t
=
Timer
();
while
(
true
)
{
int
changed
=
0
;
//tree.build(&cluster_center[0]);
// associates each point to the nearest center
#if defined(USE_OMP)
#pragma omp parallel for reduction(+:changed)
...
...
@@ -219,6 +341,7 @@ int main(int argc, char ** argv)
{
int
id_old_cluster
=
points_cluster
[
i
];
int
id_nearest_center
=
-
1
;
double
lmin
=
1.0e+100
;
for
(
int
j
=
0
;
j
<
K
;
++
j
)
...
...
@@ -236,6 +359,8 @@ int main(int argc, char ** argv)
}
}
//id_nearest_center = tree.closest(&points_center[i*3]);
if
(
id_old_cluster
!=
id_nearest_center
)
{
points_cluster
[
i
]
=
id_nearest_center
;
...
...
@@ -245,7 +370,7 @@ int main(int argc, char ** argv)
#if defined(USE_MPI)
int
tmp
=
changed
;
MPI_Allreduce
(
&
tmp
,
&
changed
,
1
,
MPI_
DOUBLE
,
MPI_SUM
,
MPI_COMM_WORLD
);
MPI_Allreduce
(
&
tmp
,
&
changed
,
1
,
MPI_
INT
,
MPI_SUM
,
MPI_COMM_WORLD
);
#endif
if
(
changed
==
0
||
iter
>=
max_iterations
)
...
...
@@ -303,9 +428,10 @@ int main(int argc, char ** argv)
}
if
(
m
.
GetProcessorRank
()
==
0
)
std
::
cout
<<
"Iteration "
<<
iter
<<
std
::
endl
;
std
::
cout
<<
"Iteration "
<<
iter
<<
" changed "
<<
changed
<<
std
::
endl
;
iter
++
;
}
//tree.clear();
std
::
cout
<<
"Clustering in "
<<
Timer
()
-
t
<<
" secs "
<<
std
::
endl
;
#if defined(USE_MPI)
if
(
balance
)
...
...
Write
Preview
Markdown
is supported
0%
Try again
or
attach a new file
Attach a file
Cancel
You are about to add
0
people
to the discussion. Proceed with caution.
Finish editing this message first!
Cancel
Please
register
or
sign in
to comment