项目作者: keks51

项目描述 :
visualizing spark plan as UML diagram
高级语言: Scala
项目地址: git://github.com/keks51/spark_plan_as_uml.git
创建时间: 2020-12-22T17:48:53Z

开源协议:Apache License 2.0


Visualizing Spark Plan as UML diagram

Maven Central

A library for drawing spark logic plan as UML.\
Using PlantUML.


Tested with spark 2.4.0\
for lower spark version some code changes should be applied\
Not tested with Datasets


  1. <dependency>
  2. <groupId>io.github.keks51</groupId>
  3. <artifactId>spark-plan-as-uml</artifactId>
  4. <version>1.0.8</version>
  5. </dependency>


Room code example

  1. import com.keks.plan.builder.{JsonDiagramBuilder, PlanUmlDiagramBuilder}
  2. import com.keks.plan.parser.DefaultExpressionParser
  3. import com.keks.plan.write.{LocalFilePlanSaver, UmlPlanSaver}
  4. import org.apache.spark.sql.SparkSession
  5. import org.apache.spark.sql.functions._
  6. import org.apache.spark.sql.types.IntegerType
  7. val spark: SparkSession = _
  8. import spark.implicits._
  9. val userDF = Seq(("a", "b")).toDF("user_id", "user_name").as("USER_TABLE")
  10. val phoneDF = Seq(("a", "b", "c")).toDF("phone_id", "user_id", "phone_number").as("PHONE_TABLE")
  11. val roomDF = Seq(("a", "b", "c")).toDF("room_id", "phone_id", "room_number").as("ROOM_TABLE")
  12. // find all rooms where 'alex' users live with phone_number starting with '+7952'
  13. val alexUsers = userDF.filter(lower(col("user_name")) === "alex")
  14. val filteredPhones = phoneDF.filter(col("phone_number").startsWith("+7952"))
  15. val result = alexUsers
  16. .join(filteredPhones, Seq("user_id"), "inner")
  17. .join(roomDF, Seq("phone_id"))
  18. .select("room_id", "room_number")


  1. import com.keks.plan.implicits._
  2. result.printPlan(planParser = new SparkLogicalRelationParser(new DefaultExpressionParser()),
  3. builder = new PlanUmlDiagramBuilder(),
  4. entityName = s"rooms",
  5. reportDescription = "find all rooms where 'alex' users live with phone_number starting with '+7952'",
  6. savePath = "examples",
  7. saver = new UmlPlanSaver())

Alt text


  1. import com.keks.plan.implicits._
  2. result.printPlan(planParser = new SparkLogicalRelationParser(new DefaultExpressionParser()),
  3. builder = new JsonDiagramBuilder(),
  4. entityName = s"rooms",
  5. reportDescription = "find all rooms where 'alex' users live with phone_number starting with '+7952'",
  6. savePath = "examples",
  7. saver = new LocalFilePlanSaver())
  1. {
  2. "entityName": "rooms",
  3. "reportDescription": "find all rooms where 'alex' users live with phone_number starting with '+7952'",
  4. "edges": [
  5. {"from": 1, "to": 3}, {"from": 3, "to": 5}, {"from": 3, "to": 14}, {"from": 5, "to": 6},
  6. {"from": 5, "to": 10}, {"from": 6, "to": 7}, {"from": 10, "to": 11}
  7. ],
  8. "nodes": [
  9. {
  10. "id": 1,
  11. "name": "SELECT",
  12. "desc": "room_id\nroom_number"
  13. },
  14. {
  15. "id": 3,
  16. "name": "JOIN",
  17. "desc": "INNER\nPHONE_TABLE.phone_id == ROOM_TABLE.phone_id"
  18. },
  19. {
  20. "id": 5,
  21. "name": "JOIN",
  22. "desc": "INNER\nUSER_TABLE.user_id == PHONE_TABLE.user_id"
  23. },
  24. {
  25. "id": 6,
  26. "name": "FILTER",
  27. "desc": "Lower[user_name] == alex"
  28. },
  29. {
  30. "id": 7,
  31. "name": "NAMED_SOURCE_TABLE",
  32. "desc": "SourceType: GENERATED TABLE\nTableName: USER_TABLE\nuser_id: StringType\nuser_name: StringType"
  33. },
  34. {
  35. "id": 10,
  36. "name": "FILTER",
  37. "desc": "phone_number StartsWith[+7952]"
  38. },
  39. {
  40. "id": 11,
  41. "name": "NAMED_SOURCE_TABLE",
  42. "desc": "SourceType: GENERATED TABLE\nTableName: PHONE_TABLE\nphone_id: StringType\nuser_id: StringType\nphone_number: StringType"
  43. },
  44. {
  45. "id": 14,
  46. "name": "NAMED_SOURCE_TABLE",
  47. "desc": "SourceType: GENERATED TABLE\nTableName: ROOM_TABLE\nroom_id: StringType\nphone_id: StringType\nroom_number: StringType"
  48. }
  49. ]
  50. }

Use pretty prints

  1. .printAsUml(
  2. entityName = "rooms",
  3. "find all rooms where 'alex' users live with phone_number starting with '+7952'",
  4. savePath = "examples")
  5. .printAsJson(
  6. entityName = "rooms",
  7. "find all rooms where 'alex' users live with phone_number starting with '+7952'",
  8. savePath = "examples")


In case of not implemented errors in DefaultExpressionParser or incorrect behavior you can extend, override and change
logic in this class