Snippet 博客主题

Spark实例-操作Hive数据

本文于1143天之前发表。

Spark操作Hive数据库,实现数据表创建,数据加载,以及数据查询。
实例代码如下:

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
package com.spark.sql

import org.apache.spark.sql.hive.HiveContext
import org.apache.spark.{SparkConf, SparkContext}

/**
* Created by Administrator on 2017/3/12.
*/
object HiveDataSource extends App{
val conf = new SparkConf()
.setAppName("HiveDataSource")
val sc = new SparkContext(conf)
val hiveContext = new HiveContext(sc)

//创建student_infos表
hiveContext.sql("DROP TABLE IF EXISTS STUDENT_INFO")
//判断表是否存在,不存在则创建
hiveContext.sql("CREATE TABLE IF NOT EXISTS STUDENT_INFO(NAME STRING,AGE INT)")
//导入数据
hiveContext.sql("LOAD DATA "
+ "LOCAL INPATH '/home/yangql/spark-study/sql/student_info.txt' "
+ "INTO TABLE STUDENT_INFO")

//创建student_score表
hiveContext.sql("DROP TABLE IF EXISTS STUDENT_SCORE")
//判断表是否存在,不存在则创建
hiveContext.sql("CREATE TABLE IF NOT EXISTS STUDENT_SCORE (NAME STRING,SCORE INT)")
//导入数据
hiveContext.sql("LOAD DATA "
+ "LOCAL INPATH '/home/yangql/spark-study/sql/student_score.txt' "
+ "INTO TABLE STUDENT_SCORE")
//查询分数大于80的学生信息,并保存到good_student信息
val goodStudentsDF=hiveContext.sql("SELECT T1.NAME,T1.AGE,T2.SCORE " +
"FROM student_info T1 " +
"INNER JOIN student_SCORE T2 " +
"ON T1.NAME=T2.NAME " +
"WHERE T2.SCORE>80")
hiveContext.sql("DROP TABLE IF EXISTS GOOD_STUDENT")
//goodStudentsDF.saveAsTable("GOOD_STUDENT")
goodStudentsDF.write.saveAsTable("GOOD_STUDENT")

//查询打印
val goodStudentsRows=hiveContext.table("GOOD_STUDENT").collect()
for(goodStudentsRow <- goodStudentsRows){
println(goodStudentsRow)
}
}