another alternative-
Load the test data
val data =
"""
|transaction_status|amount|category|email_id |unique_id|acct_no|ciskey
|posted |116.26|Family |[email protected]|12345678 |51663 |47626220
|posted |116.26|Family |[email protected]|12345678 |51663 |47626221
|posted |116.26|Family |[email protected]|12345678 |51663 |47626222
""".stripMargin
val stringDS = data.split(System.lineSeparator())
.map(_.split("\\|").map(_.replaceAll("""^[ \t]+|[ \t]+$""", "")).mkString(","))
.toSeq.toDS()
val df = spark.read
.option("sep", ",")
.option("inferSchema", "true")
.option("header", "true")
.option("nullValue", "null")
.csv(stringDS)
df.show(false)
df.printSchema()
/**
* +------------------+------+--------+--------------+---------+-------+--------+
* |transaction_status|amount|category|email_id |unique_id|acct_no|ciskey |
* +------------------+------+--------+--------------+---------+-------+--------+
* |posted |116.26|Family |[email protected]|12345678 |51663 |47626220|
* |posted |116.26|Family |[email protected]|12345678 |51663 |47626221|
* |posted |116.26|Family |[email protected]|12345678 |51663 |47626222|
* +------------------+------+--------+--------------+---------+-------+--------+
*
* root
* |-- transaction_status: string (nullable = true)
* |-- amount: double (nullable = true)
* |-- category: string (nullable = true)
* |-- email_id: string (nullable = true)
* |-- unique_id: integer (nullable = true)
* |-- acct_no: integer (nullable = true)
* |-- ciskey: integer (nullable = true)
*/
create required json
val groupBy = df.columns.filter(_!="ciskey")
df.groupBy(groupBy.map(col): _*).agg(collect_list($"ciskey").as("accounts"))
.withColumn("ciskey", element_at($"accounts", 1) )
.withColumn("customers", expr("TRANSFORM(accounts, " +
"x -> named_struct('ciskey_no', x, 'ciskey_val', 'IND'))"))
.withColumn("accounts",
struct($"acct_no", $"customers"))
.drop("customers")
.toJSON
.show(false)
/**
* +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
* |value |
* +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
* |{"transaction_status":"posted","amount":116.26,"category":"Family","email_id":"[email protected]","unique_id":12345678,"acct_no":51663,"accounts":{"acct_no":51663,"customers":[{"ciskey_no":47626220,"ciskey_val":"IND"},{"ciskey_no":47626221,"ciskey_val":"IND"},{"ciskey_no":47626222,"ciskey_val":"IND"}]},"ciskey":47626220}|
* +-------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------------+
*/
Json-
{
"transaction_status": "posted",
"amount": 116.26,
"category": "Family",
"email_id": "[email protected]",
"unique_id": 12345678,
"acct_no": 51663,
"accounts": {
"acct_no": 51663,
"customers": [{
"ciskey_no": 47626220,
"ciskey_val": "IND"
}, {
"ciskey_no": 47626221,
"ciskey_val": "IND"
}, {
"ciskey_no": 47626222,
"ciskey_val": "IND"
}]
},
"ciskey": 47626220
}