http://stackoverflow.com/questions/25697014/efficient-nearest-neighbour-search-in-scala1. 使用Spark RDD中的:
top(num:Int) ---- 前num个最大
takeOrdered(num:Int)----前num个最小https://spark.apache.org/docs/0.8.1/api/core/org/apache/spark/rdd/RDD.html2. 使用Orderingcase class Coord(x: Double, y: Double) {
def dist(c: Coord) = Math.sqrt(Math.pow(x - c.x, 2) + Math.pow(y - c.y, 2))
}
class CoordOrdering(x: Coord) extends Ordering[Coord] {
def compare(a: Coord, b: Coord) = a.dist(x) compare b.dist(x)
}
def top[T](xs: Seq[T], n: Int)(implicit ord: Ordering[T]): Seq[T] = {
// xs is an ordered sequence of n elements. insert returns xs with e inserted
// if it is less than anything currently in the sequence (and in that case,
// the last element is dropped) otherwise returns an unmodifed sequence
def insert[T](xs: Seq[T], e: T)(implicit ord: Ordering[T]): Seq[T] = {
val (l, r) = xs.span(x => ord.lt(x, e))
(l ++ (e +: r)).take(n)
}
xs.drop(n).foldLeft(xs.take(n).sorted)(insert)
}
val grid = (1 to 250000).map { _ => Coord(Math.random * 5, Math.random * 5) }
val x = Coord(Math.random * 5, Math.random * 5)
top(grid, 3)(new CoordOrdering(x))
=========================================================================================
scala> val grid = (1 to 250000).map { _ => Coord(Math.random * 5, Math.random * 5) }
grid: scala.collection.immutable.IndexedSeq[Coord] = Vector(Coord(0.148285617037
4025,0.35673723872494234), Coord(4.12828875570408,4.172828738765978), Coord(4.08
4800100061857,2.4739841867739654), Coord(3.222745588732257,4.191229620411388), C
oord(3.944913340604405,0.5406655323589682), Coord(1.2070881268949218,0.704577399
4573445), Coord(3.7214982831073886,3.735252114090027), Coord(2.891867212320584,3
.1507617575845313), Coord(2.5591769145542376,2.5518872306493106), Coord(4.305062
655795423,0.28892953316559056), Coord(2.4231719978833395,4.7278342661842885), Co
ord(0.2796272401579608,4.4414117072564), Coord(3.7717801951955012,0.821161227367
3746), Coord(4.917774621754089,2.2591306162265723), Coord(1.3041513536208722,0.4
8129187933258377), Coord(3.5491079544916526,1.571186944826074), Coord(4.04744...
scala> val x = Coord(Math.random * 5, Math.random * 5)
x: Coord = Coord(0.6871834970151458,2.777292905435343)
scala> top(grid, 3)(new CoordOrdering(x))
res0: Seq[Coord] = Vector(Coord(0.6793569897057183,2.7762919780629556), Coord(0.
6953108562088722,2.774053195907583), Coord(0.6952823811188164,2.7813641015518433
))
评论