You can simply use a case class to prepare a sample data set. You can get DataFrame from hiveContext.sql as well.
import org.apache.spark.sql.functions.col
case class Person(name: String, age: Int, personid : Int)
case class Profile(name: String, personid : Int , profileDescription: String)
val df1 = sqlContext.createDataFrame(
Person("Bindu",20, 2)
:: Person("Raphel",25, 5)
:: Person("Ram",40, 9):: Nil)
val df2 = sqlContext.createDataFrame(
Profile("Spark",2, "SparkSQLMaster")
:: Profile("Spark",5, "SparkGuru")
:: Profile("Spark",9, "DevHunter"):: Nil
)
val df_asPerson = df1.as("dfperson")
val df_asProfile = df2.as("dfprofile")
val joined_df = df_asPerson.join(
df_asProfile
, col("dfperson.personid") === col("dfprofile.personid")
, "inner")
joined_df.select(
col("dfperson.name")
, col("dfperson.age")
, col("dfprofile.name")
, col("dfprofile.profileDescription"))
.show