diff --git a/README.md b/README.md
index 9f4d90c8c25abc0fe5d3be3d08b99300d9c05cb9..4ffe10c27f328da1e315febadd30a83f7e09c5cc 100644
--- a/README.md
+++ b/README.md
@@ -5,9 +5,9 @@
Introduction
============
-The graph algorithm library running on Kunpeng processors is an acceleration library that provides a rich set of high-level tools for graph algorithms. It is developed based on original APIs of Apache [Spark 3.1.1](https://github.com/apache/spark/tree/v3.1.1). The acceleration library greatly improves the computing power in big data scenarios. Additionally, it provides multiple APIs in addition to the original APIs if the Apache Spark graph library.
+The graph algorithm library running on Kunpeng processors is an acceleration library that provides a rich set of high-level tools for graph algorithms. It is developed based on original APIs of Apache [Spark 3.3.1](https://github.com/apache/spark/tree/v3.3.1). The acceleration library greatly improves the computing power in big data scenarios. Additionally, it provides multiple APIs in addition to the original APIs if the Apache Spark graph library.
-The library provides 16 graph algorithms: triangle count (TC), weak clique enumeration (WCE), maximal clique enumeration (MCE), modualrity, cycle detection (CD), label propagation algorithm (LPA), Louvain, PageRank, Multiple Source shortest path(MSSP), strongly connected components (SCC), K-core decomposition (KCore), breadth-first-search (BFS), ClusteringCoefficient, PersonalizedPageRank, Betweenness and SubgraphMatching. You can find the latest documentation on the project web page. This README file contains only basic setup instructions.
+The library provides 4 graph algorithms: maximal clique enumeration (MCE), Multiple Source shortest path(MSSP), PageRank and Betweenness. You can find the latest documentation on the project web page. This README file contains only basic setup instructions.
@@ -20,11 +20,11 @@ Building
mvn package
- Obtain "boostkit-graph-acc_2.12-2.2.0-spark3.1.1.jar" from the "Spark-graph-algo-lib/graph-accelerator/target/" directory
+ Obtain "boostkit-graph-acc_2.12-3.0.0-spark3.3.1.jar" from the "Spark-graph-algo-lib/graph-accelerator/target/" directory
- Obtain "boostkit-graph-core_2.12-2.2.0-Spark3.1.1.jar" from the "Spark-graph-algo-lib/graph-core/target/" directory
+ Obtain "boostkit-graph-core_2.12-3.0.0-Spark3.3.1.jar" from the "Spark-graph-algo-lib/graph-core/target/" directory
- Obtain "boostkit-graph-kernel-clinet_2.12-2.2.0-Spark3.1.1.jar" from the "Spark-graph-algo-lib/graph-kernel/target/" directory
+ Obtain "boostkit-graph-kernel-clinet_2.12-3.0.0-Spark3.3.1.jar" from the "Spark-graph-algo-lib/graph-kernel/target/" directory
diff --git a/graph-accelerator/pom.xml b/graph-accelerator/pom.xml
index 5ff708425b3dbef3d9efe4c5292db7e940e3f5ba..81de0a517f9b7b9418df14d95711068e7840d092 100644
--- a/graph-accelerator/pom.xml
+++ b/graph-accelerator/pom.xml
@@ -2,33 +2,35 @@
org.apache.spark
boostkit-graph
- 2.2.0
+ 3.0.0
4.0.0
org.apache.spark.graphx.lib
boostkit-graph-acc_2.12
- 2.2.0
+ 3.0.0
${project.artifactId}
Spark graph algo accelerator
-
+
org.apache.spark
spark-graphx_2.12
- 3.1.1
+ 3.3.1
+ provided
org.apache.spark.graphx.lib
boostkit-graph-kernel-client_2.12
- 2.2.0
+ 3.0.0
${spark.version}
- compile
+ provided
it.unimi.dsi
fastutil
8.3.1
+ provided
org.scalatest
@@ -40,11 +42,13 @@
org.mockito
mockito-core
1.10.19
+ provided
org.apache.spark
spark-mllib_2.12
- 3.1.1
+ 3.3.1
+ provided
diff --git a/graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala b/graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala
new file mode 100644
index 0000000000000000000000000000000000000000..be2451cb47fddf7998dc121dbeada4d91305f4e2
--- /dev/null
+++ b/graph-accelerator/src/main/scala/org/apache/spark/graphx/GraphOps.scala
@@ -0,0 +1,465 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements. See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.spark.graphx
+
+import scala.reflect.ClassTag
+import scala.util.Random
+
+import org.apache.spark.SparkException
+import org.apache.spark.graphx.lib._
+import org.apache.spark.ml.linalg.Vector
+import org.apache.spark.rdd.RDD
+
+/**
+ * Contains additional functionality for [[Graph]]. All operations are expressed in terms of the
+ * efficient GraphX API. This class is implicitly constructed for each Graph object.
+ *
+ * @tparam VD the vertex attribute type
+ * @tparam ED the edge attribute type
+ */
+class GraphOps[VD: ClassTag, ED: ClassTag](graph: Graph[VD, ED]) extends Serializable {
+
+ /** The number of edges in the graph. */
+ @transient lazy val numEdges: Long = graph.edges.count()
+
+ /** The number of vertices in the graph. */
+ @transient lazy val numVertices: Long = graph.vertices.count()
+
+ /**
+ * The in-degree of each vertex in the graph.
+ * @note Vertices with no in-edges are not returned in the resulting RDD.
+ */
+ @transient lazy val inDegrees: VertexRDD[Int] =
+ degreesRDD(EdgeDirection.In).setName("GraphOps.inDegrees")
+
+ /**
+ * The out-degree of each vertex in the graph.
+ * @note Vertices with no out-edges are not returned in the resulting RDD.
+ */
+ @transient lazy val outDegrees: VertexRDD[Int] =
+ degreesRDD(EdgeDirection.Out).setName("GraphOps.outDegrees")
+
+ /**
+ * The degree of each vertex in the graph.
+ * @note Vertices with no edges are not returned in the resulting RDD.
+ */
+ @transient lazy val degrees: VertexRDD[Int] =
+ degreesRDD(EdgeDirection.Either).setName("GraphOps.degrees")
+
+ /**
+ * Computes the neighboring vertex degrees.
+ *
+ * @param edgeDirection the direction along which to collect neighboring vertex attributes
+ */
+ private def degreesRDD(edgeDirection: EdgeDirection): VertexRDD[Int] = {
+ if (edgeDirection == EdgeDirection.In) {
+ DegreeUtil.inDegreesRDD(graph)
+ } else if (edgeDirection == EdgeDirection.Out) {
+ DegreeUtil.outDegreesRDD(graph)
+ } else {
+ DegreeUtil.degreesRDD(graph)
+ }
+ }
+
+
+ /**
+ * Collect the neighbor vertex ids for each vertex.
+ *
+ * @param edgeDirection the direction along which to collect
+ * neighboring vertices
+ *
+ * @return the set of neighboring ids for each vertex
+ */
+ def collectNeighborIds(edgeDirection: EdgeDirection): VertexRDD[Array[VertexId]] = {
+ val nbrs =
+ if (edgeDirection == EdgeDirection.Either) {
+ graph.aggregateMessages[Array[VertexId]](
+ ctx => { ctx.sendToSrc(Array(ctx.dstId)); ctx.sendToDst(Array(ctx.srcId)) },
+ _ ++ _, TripletFields.None)
+ } else if (edgeDirection == EdgeDirection.Out) {
+ graph.aggregateMessages[Array[VertexId]](
+ ctx => ctx.sendToSrc(Array(ctx.dstId)),
+ _ ++ _, TripletFields.None)
+ } else if (edgeDirection == EdgeDirection.In) {
+ graph.aggregateMessages[Array[VertexId]](
+ ctx => ctx.sendToDst(Array(ctx.srcId)),
+ _ ++ _, TripletFields.None)
+ } else {
+ throw new SparkException("It doesn't make sense to collect neighbor ids without a " +
+ "direction. (EdgeDirection.Both is not supported; use EdgeDirection.Either instead.)")
+ }
+ graph.vertices.leftZipJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+ nbrsOpt.getOrElse(Array.empty[VertexId])
+ }
+ } // end of collectNeighborIds
+
+ /**
+ * Collect the neighbor vertex attributes for each vertex.
+ *
+ * @note This function could be highly inefficient on power-law
+ * graphs where high degree vertices may force a large amount of
+ * information to be collected to a single location.
+ *
+ * @param edgeDirection the direction along which to collect
+ * neighboring vertices
+ *
+ * @return the vertex set of neighboring vertex attributes for each vertex
+ */
+ def collectNeighbors(edgeDirection: EdgeDirection): VertexRDD[Array[(VertexId, VD)]] = {
+ val nbrs = edgeDirection match {
+ case EdgeDirection.Either =>
+ graph.aggregateMessages[Array[(VertexId, VD)]](
+ ctx => {
+ ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr)))
+ ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr)))
+ },
+ (a, b) => a ++ b, TripletFields.All)
+ case EdgeDirection.In =>
+ graph.aggregateMessages[Array[(VertexId, VD)]](
+ ctx => ctx.sendToDst(Array((ctx.srcId, ctx.srcAttr))),
+ (a, b) => a ++ b, TripletFields.Src)
+ case EdgeDirection.Out =>
+ graph.aggregateMessages[Array[(VertexId, VD)]](
+ ctx => ctx.sendToSrc(Array((ctx.dstId, ctx.dstAttr))),
+ (a, b) => a ++ b, TripletFields.Dst)
+ case EdgeDirection.Both =>
+ throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" +
+ "EdgeDirection.Either instead.")
+ }
+ graph.vertices.leftJoin(nbrs) { (vid, vdata, nbrsOpt) =>
+ nbrsOpt.getOrElse(Array.empty[(VertexId, VD)])
+ }
+ } // end of collectNeighbor
+
+ /**
+ * Returns an RDD that contains for each vertex v its local edges,
+ * i.e., the edges that are incident on v, in the user-specified direction.
+ * Warning: note that singleton vertices, those with no edges in the given
+ * direction will not be part of the return value.
+ *
+ * @note This function could be highly inefficient on power-law
+ * graphs where high degree vertices may force a large amount of
+ * information to be collected to a single location.
+ *
+ * @param edgeDirection the direction along which to collect
+ * the local edges of vertices
+ *
+ * @return the local edges for each vertex
+ */
+ def collectEdges(edgeDirection: EdgeDirection): VertexRDD[Array[Edge[ED]]] = {
+ edgeDirection match {
+ case EdgeDirection.Either =>
+ graph.aggregateMessages[Array[Edge[ED]]](
+ ctx => {
+ ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr)))
+ ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr)))
+ },
+ (a, b) => a ++ b, TripletFields.EdgeOnly)
+ case EdgeDirection.In =>
+ graph.aggregateMessages[Array[Edge[ED]]](
+ ctx => ctx.sendToDst(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))),
+ (a, b) => a ++ b, TripletFields.EdgeOnly)
+ case EdgeDirection.Out =>
+ graph.aggregateMessages[Array[Edge[ED]]](
+ ctx => ctx.sendToSrc(Array(new Edge(ctx.srcId, ctx.dstId, ctx.attr))),
+ (a, b) => a ++ b, TripletFields.EdgeOnly)
+ case EdgeDirection.Both =>
+ throw new SparkException("collectEdges does not support EdgeDirection.Both. Use" +
+ "EdgeDirection.Either instead.")
+ }
+ }
+
+ /**
+ * Remove self edges.
+ *
+ * @return a graph with all self edges removed
+ */
+ def removeSelfEdges(): Graph[VD, ED] = {
+ graph.subgraph(epred = e => e.srcId != e.dstId)
+ }
+
+ /**
+ * Join the vertices with an RDD and then apply a function from the
+ * vertex and RDD entry to a new vertex value. The input table
+ * should contain at most one entry for each vertex. If no entry is
+ * provided the map function is skipped and the old value is used.
+ *
+ * @tparam U the type of entry in the table of updates
+ * @param table the table to join with the vertices in the graph.
+ * The table should contain at most one entry for each vertex.
+ * @param mapFunc the function used to compute the new vertex
+ * values. The map function is invoked only for vertices with a
+ * corresponding entry in the table otherwise the old vertex value
+ * is used.
+ *
+ * @example This function is used to update the vertices with new
+ * values based on external data. For example we could add the out
+ * degree to each vertex record
+ *
+ * {{{
+ * val rawGraph: Graph[Int, Int] = GraphLoader.edgeListFile(sc, "webgraph")
+ * .mapVertices((_, _) => 0)
+ * val outDeg = rawGraph.outDegrees
+ * val graph = rawGraph.joinVertices[Int](outDeg)
+ * ((_, _, outDeg) => outDeg)
+ * }}}
+ *
+ */
+ def joinVertices[U: ClassTag](table: RDD[(VertexId, U)])(mapFunc: (VertexId, VD, U) => VD)
+ : Graph[VD, ED] = {
+ val uf = (id: VertexId, data: VD, o: Option[U]) => {
+ o match {
+ case Some(u) => mapFunc(id, data, u)
+ case None => data
+ }
+ }
+ graph.outerJoinVertices(table)(uf)
+ }
+
+ /**
+ * Filter the graph by computing some values to filter on, and applying the predicates.
+ *
+ * @param preprocess a function to compute new vertex and edge data before filtering
+ * @param epred edge pred to filter on after preprocess, see more details under
+ * [[org.apache.spark.graphx.Graph#subgraph]]
+ * @param vpred vertex pred to filter on after preprocess, see more details under
+ * [[org.apache.spark.graphx.Graph#subgraph]]
+ * @tparam VD2 vertex type the vpred operates on
+ * @tparam ED2 edge type the epred operates on
+ * @return a subgraph of the original graph, with its data unchanged
+ *
+ * @example This function can be used to filter the graph based on some property, without
+ * changing the vertex and edge values in your program. For example, we could remove the vertices
+ * in a graph with 0 outdegree
+ *
+ * {{{
+ * graph.filter(
+ * graph => {
+ * val degrees: VertexRDD[Int] = graph.outDegrees
+ * graph.outerJoinVertices(degrees) {(vid, data, deg) => deg.getOrElse(0)}
+ * },
+ * vpred = (vid: VertexId, deg:Int) => deg > 0
+ * )
+ * }}}
+ *
+ */
+ def filter[VD2: ClassTag, ED2: ClassTag](
+ preprocess: Graph[VD, ED] => Graph[VD2, ED2],
+ epred: (EdgeTriplet[VD2, ED2]) => Boolean = (x: EdgeTriplet[VD2, ED2]) => true,
+ vpred: (VertexId, VD2) => Boolean = (v: VertexId, d: VD2) => true): Graph[VD, ED] = {
+ graph.mask(preprocess(graph).subgraph(epred, vpred))
+ }
+
+ /**
+ * Picks a random vertex from the graph and returns its ID.
+ */
+ def pickRandomVertex(): VertexId = {
+ val probability = 50.0 / graph.numVertices
+ var found = false
+ var retVal: VertexId = null.asInstanceOf[VertexId]
+ while (!found) {
+ val selectedVertices = graph.vertices.flatMap { vidVvals =>
+ if (Random.nextDouble() < probability) { Some(vidVvals._1) }
+ else { None }
+ }
+ if (selectedVertices.count > 0) {
+ found = true
+ val collectedVertices = selectedVertices.collect()
+ retVal = collectedVertices(Random.nextInt(collectedVertices.length))
+ }
+ }
+ retVal
+ }
+
+ /**
+ * Convert bi-directional edges into uni-directional ones.
+ * Some graph algorithms (e.g., TriangleCount) assume that an input graph
+ * has its edges in canonical direction.
+ * This function rewrites the vertex ids of edges so that srcIds are smaller
+ * than dstIds, and merges the duplicated edges.
+ *
+ * @param mergeFunc the user defined reduce function which should
+ * be commutative and associative and is used to combine the output
+ * of the map phase
+ *
+ * @return the resulting graph with canonical edges
+ */
+ def convertToCanonicalEdges(
+ mergeFunc: (ED, ED) => ED = (e1, e2) => e1): Graph[VD, ED] = {
+ val newEdges =
+ graph.edges
+ .map {
+ case e if e.srcId < e.dstId => ((e.srcId, e.dstId), e.attr)
+ case e => ((e.dstId, e.srcId), e.attr)
+ }
+ .reduceByKey(mergeFunc)
+ .map(e => new Edge(e._1._1, e._1._2, e._2))
+ Graph(graph.vertices, newEdges)
+ }
+
+ /**
+ * Execute a Pregel-like iterative vertex-parallel abstraction. The
+ * user-defined vertex-program `vprog` is executed in parallel on
+ * each vertex receiving any inbound messages and computing a new
+ * value for the vertex. The `sendMsg` function is then invoked on
+ * all out-edges and is used to compute an optional message to the
+ * destination vertex. The `mergeMsg` function is a commutative
+ * associative function used to combine messages destined to the
+ * same vertex.
+ *
+ * On the first iteration all vertices receive the `initialMsg` and
+ * on subsequent iterations if a vertex does not receive a message
+ * then the vertex-program is not invoked.
+ *
+ * This function iterates until there are no remaining messages, or
+ * for `maxIterations` iterations.
+ *
+ * @tparam A the Pregel message type
+ *
+ * @param initialMsg the message each vertex will receive at the on
+ * the first iteration
+ *
+ * @param maxIterations the maximum number of iterations to run for
+ *
+ * @param activeDirection the direction of edges incident to a vertex that received a message in
+ * the previous round on which to run `sendMsg`. For example, if this is `EdgeDirection.Out`, only
+ * out-edges of vertices that received a message in the previous round will run.
+ *
+ * @param vprog the user-defined vertex program which runs on each
+ * vertex and receives the inbound message and computes a new vertex
+ * value. On the first iteration the vertex program is invoked on
+ * all vertices and is passed the default message. On subsequent
+ * iterations the vertex program is only invoked on those vertices
+ * that receive messages.
+ *
+ * @param sendMsg a user supplied function that is applied to out
+ * edges of vertices that received messages in the current
+ * iteration
+ *
+ * @param mergeMsg a user supplied function that takes two incoming
+ * messages of type A and merges them into a single message of type
+ * A. ''This function must be commutative and associative and
+ * ideally the size of A should not increase.''
+ *
+ * @return the resulting graph at the end of the computation
+ *
+ */
+ def pregel[A: ClassTag](
+ initialMsg: A,
+ maxIterations: Int = Int.MaxValue,
+ activeDirection: EdgeDirection = EdgeDirection.Either)(
+ vprog: (VertexId, VD, A) => VD,
+ sendMsg: EdgeTriplet[VD, ED] => Iterator[(VertexId, A)],
+ mergeMsg: (A, A) => A)
+ : Graph[VD, ED] = {
+ Pregel(graph, initialMsg, maxIterations, activeDirection)(vprog, sendMsg, mergeMsg)
+ }
+
+ /**
+ * Run a dynamic version of PageRank returning a graph with vertex attributes containing the
+ * PageRank and edge attributes containing the normalized edge weight.
+ *
+ * @see [[org.apache.spark.graphx.lib.PageRank$#runUntilConvergence]]
+ */
+ def pageRank(tol: Double, resetProb: Double = 0.15): Graph[Double, Double] = {
+ PageRank.runUntilConvergence(graph, tol, resetProb)
+ }
+
+
+ /**
+ * Run personalized PageRank for a given vertex, such that all random walks
+ * are started relative to the source node.
+ *
+ * @see [[org.apache.spark.graphx.lib.PageRank$#runUntilConvergenceWithOptions]]
+ */
+ def personalizedPageRank(src: VertexId, tol: Double,
+ resetProb: Double = 0.15): Graph[Double, Double] = {
+ PageRank.runUntilConvergenceWithOptions(graph, tol, resetProb, Some(src))
+ }
+
+ /**
+ * Run parallel personalized PageRank for a given array of source vertices, such
+ * that all random walks are started relative to the source vertices
+ */
+ def staticParallelPersonalizedPageRank(sources: Array[VertexId], numIter: Int,
+ resetProb: Double = 0.15) : Graph[Vector, Double] = {
+ PageRank.runParallelPersonalizedPageRank(graph, numIter, resetProb, sources)
+ }
+
+ /**
+ * Run Personalized PageRank for a fixed number of iterations with
+ * with all iterations originating at the source node
+ * returning a graph with vertex attributes
+ * containing the PageRank and edge attributes the normalized edge weight.
+ *
+ * @see [[org.apache.spark.graphx.lib.PageRank$#runWithOptions]]
+ */
+ def staticPersonalizedPageRank(src: VertexId, numIter: Int,
+ resetProb: Double = 0.15): Graph[Double, Double] = {
+ PageRank.runWithOptions(graph, numIter, resetProb, Some(src))
+ }
+
+ /**
+ * Run PageRank for a fixed number of iterations returning a graph with vertex attributes
+ * containing the PageRank and edge attributes the normalized edge weight.
+ *
+ * @see [[org.apache.spark.graphx.lib.PageRank$#run]]
+ */
+ def staticPageRank(numIter: Int, resetProb: Double = 0.15): Graph[Double, Double] = {
+ PageRank.run(graph, numIter, resetProb)
+ }
+
+ /**
+ * Compute the connected component membership of each vertex and return a graph with the vertex
+ * value containing the lowest vertex id in the connected component containing that vertex.
+ *
+ * @see `org.apache.spark.graphx.lib.ConnectedComponents.run`
+ */
+ def connectedComponents(): Graph[VertexId, ED] = {
+ ConnectedComponents.run(graph)
+ }
+
+ /**
+ * Compute the connected component membership of each vertex and return a graph with the vertex
+ * value containing the lowest vertex id in the connected component containing that vertex.
+ *
+ * @see `org.apache.spark.graphx.lib.ConnectedComponents.run`
+ */
+ def connectedComponents(maxIterations: Int): Graph[VertexId, ED] = {
+ ConnectedComponents.run(graph, maxIterations)
+ }
+
+ /**
+ * Compute the number of triangles passing through each vertex.
+ *
+ * @see [[org.apache.spark.graphx.lib.TriangleCount$#run]]
+ */
+ def triangleCount(): Graph[Int, ED] = {
+ TriangleCount.run(graph)
+ }
+
+ /**
+ * Compute the strongly connected component (SCC) of each vertex and return a graph with the
+ * vertex value containing the lowest vertex id in the SCC containing that vertex.
+ *
+ * @see [[org.apache.spark.graphx.lib.StronglyConnectedComponents$#run]]
+ */
+ def stronglyConnectedComponents(numIter: Int): Graph[VertexId, ED] = {
+ StronglyConnectedComponents.run(graph, numIter)
+ }
+} // end of GraphOps
diff --git a/graph-core/pom.xml b/graph-core/pom.xml
index edc08a899bda35da85fec0f123eaa018f0ac4c36..b143306a995e93cb6f0c499850a43572b1ab4992 100644
--- a/graph-core/pom.xml
+++ b/graph-core/pom.xml
@@ -2,13 +2,13 @@
org.apache.spark
boostkit-graph
- 2.2.0
+ 3.0.0
4.0.0
boostkit-graph-core_2.12
- 2.2.0
+ 3.0.0
${project.artifactId}
Spark graph core
@@ -16,7 +16,8 @@
org.apache.spark
spark-graphx_2.12
- 3.1.1
+ 3.3.1
+ provided
org.scalatest
@@ -24,15 +25,23 @@
3.0.5
test
+
+ com.fasterxml.jackson.core
+ jackson-core
+ 2.13.0
+ provided
+
org.mockito
mockito-core
1.10.19
+ provided
org.apache.spark
spark-mllib_2.12
- 3.1.1
+ 3.3.1
+ provided
diff --git a/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala b/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala
index ae3ba981d0d4911dd9d7cbe3e4c6037fad9d7ed4..8212d90809b761e1d6c59497ffb28252c53a58c5 100644
--- a/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala
+++ b/graph-core/src/main/scala/spark/mllib/feature/Word2Vec.scala
@@ -705,7 +705,7 @@ class Word2VecModel private[spark](
}
@Since("1.4.0")
-object Word2VecModel extends Loader[Word2VecModel] {
+object Word2VecModel extends Loader[Word2VecModel]{
private def buildWordIndex(model: Map[String, Array[Float]]): Map[String, Int] = {
model.keys.zipWithIndex.toMap
@@ -746,7 +746,7 @@ object Word2VecModel extends Loader[Word2VecModel] {
def save(sc: SparkContext, path: String, model: Map[String, Array[Float]]): Unit = {
val spark = SparkSession.builder().sparkContext(sc).getOrCreate()
- val vectorSize = model.values.head.length
+ val vectorSize = model.values.toArray.head.length
val numWords = model.size
val metadata = compact(render(
("class" -> classNameV1_0) ~ ("version" -> formatVersionV1_0) ~
diff --git a/graph-kernel/pom.xml b/graph-kernel/pom.xml
index 6bd7513a4bb3fb1e8860f5148e90511032af70c2..b6f2275060d2ca01999d451a6057fed060db9ec6 100644
--- a/graph-kernel/pom.xml
+++ b/graph-kernel/pom.xml
@@ -2,15 +2,15 @@
org.apache.spark
boostkit-graph
- 2.2.0
+ 3.0.0
4.0.0
org.apache.spark.graphx.lib
boostkit-graph-kernel-client_2.12
- 2.2.0
+ 3.0.0
${project.artifactId}
Spark graph algo client
- 2020
+ 2022
1.8
1.8
@@ -21,7 +21,8 @@
org.apache.spark
spark-graphx_2.12
- 3.1.1
+ 3.3.1
+ provided
diff --git a/pom.xml b/pom.xml
index 112de3a722c07868bd1caa6d4cf06a4661332f38..c0e87b01ed15372df04bd76c2463461d005bd739 100644
--- a/pom.xml
+++ b/pom.xml
@@ -3,16 +3,16 @@
4.0.0
org.apache.spark
boostkit-graph
- 2.2.0
+ 3.0.0
${project.artifactId}
Spark graph algo
- 2020
+ 2022
pom
1.8
1.8
UTF-8
- spark3.1.1
+ spark3.3.1
diff --git a/scalastyle-config.xml b/scalastyle-config.xml
index 866e93869eeb0e8380e7360d17e2b51964a58103..10b2bcd2cf90c504dec2949628f0687e0e694503 100644
--- a/scalastyle-config.xml
+++ b/scalastyle-config.xml
@@ -38,17 +38,17 @@ This file is divided into 3 sections:
-->
- Scalastyle standard configuration
+ Scalastyle standard configuration
-
-
-
+
+
+
-
+
-
+
+
-
+
-
+
-
+
-
-
-
-
- true
-
-
+
+
+
+
+ true
+
+
-
-
-
+
+
+
-
-
-
+
+
+
-
-
-
+
+
+
-
-
-
+
+
+
-
+
-
+
-
+
-
+
-
-
-
-
-
-
+
+
+
+
+
+
-
+
-
+
-
+
-
+
-
-
- ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW
-
-
+
+
+ ARROW, EQUALS, ELSE, TRY, CATCH, FINALLY, LARROW, RARROW
+
+
-
-
- ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW
-
-
+
+
+ ARROW, EQUALS, COMMA, COLON, IF, ELSE, DO, WHILE, FOR, MATCH, TRY, CATCH, FINALLY, LARROW, RARROW
+
+
-
-
+
+
-
-
- ^FunSuite[A-Za-z]*$
- Tests must extend org.apache.spark.SparkFunSuite instead.
-
+
+
+ ^FunSuite[A-Za-z]*$
+ Tests must extend org.apache.spark.SparkFunSuite instead.
+
-
-
- ^println$
-
+
+ ^println$
+
-
-
-
- spark(.sqlContext)?.sparkContext.hadoopConfiguration
-
-
+
-
- @VisibleForTesting
-
+ @VisibleForTesting
+
-
+
-
- Runtime\.getRuntime\.addShutdownHook
-
+ Runtime\.getRuntime\.addShutdownHook
+
-
+
-
- mutable\.SynchronizedBuffer
-
+ mutable\.SynchronizedBuffer
+
-
+
-
- Class\.forName
-
+ Class\.forName
+
-
+
-
- Await\.result
-
+ Await\.result
+
-
+
-
- Await\.ready
-
+ Await\.ready
+
-
-
-
-
- JavaConversions
- Instead of importing implicits in scala.collection.JavaConversions._, import
- scala.collection.JavaConverters._ and use .asScala / .asJava methods
-
-
-
- org\.apache\.commons\.lang\.
- Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
- of Commons Lang 2 (package org.apache.commons.lang.*)
-
-
-
- extractOpt
- Use jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
- is slower.
-
-
-
-
- java,scala,3rdParty,spark
- javax?\..*
- scala\..*
- (?!org\.apache\.spark\.).*
- org\.apache\.spark\..*
-
-
-
-
-
- COMMA
-
-
-
-
-
- \)\{
-
+
+
+
+ JavaConversions
+ Instead of importing implicits in scala.collection.JavaConversions._, import
+ scala.collection.JavaConverters._ and use .asScala / .asJava methods
+
+
+
+ org\.apache\.commons\.lang\.
+ Use Commons Lang 3 classes (package org.apache.commons.lang3.*) instead
+ of Commons Lang 2 (package org.apache.commons.lang.*)
+
+
+
+ extractOpt
+ Use Utils.jsonOption(x).map(.extract[T]) instead of .extractOpt[T], as the latter
+ is slower.
+
+
+
+
+ java,scala,3rdParty,spark
+ javax?\..*
+ scala\..*
+ (?!org\.apache\.spark\.).*
+ org\.apache\.spark\..*
+
+
+
+
+
+ COMMA
+
+
+
+
+
+ \)\{
+
-
-
-
- (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]
- Use Javadoc style indentation for multiline comments
-
-
-
- case[^\n>]*=>\s*\{
- Omit braces in case clauses.
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
- 800>
-
-
-
-
- 30
-
-
-
-
- 10
-
-
-
-
- 50
-
-
-
-
-
-
-
-
-
-
- -1,0,1,2,3
-
+
+
+
+ (?m)^(\s*)/[*][*].*$(\r|)\n^\1 [*]
+ Use Javadoc style indentation for multiline comments
+
+
+
+ case[^\n>]*=>\s*\{
+ Omit braces in case clauses.
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ 800>
+
+
+
+
+ 30
+
+
+
+
+ 10
+
+
+
+
+ 50
+
+
+
+
+
+
+
+
+
+
+ -1,0,1,2,3
+