南强小屋 Design By 杰米
如下所示:
import java.text.DecimalFormat import com.alibaba.fastjson.JSON import com.donews.data.AppConfig import com.typesafe.config.ConfigFactory import org.apache.spark.sql.types.{StructField, StructType} import org.apache.spark.sql.{Row, SaveMode, DataFrame, SQLContext} import org.apache.spark.{SparkConf, SparkContext} import org.slf4j.LoggerFactory /** * Created by silentwolf on 2016/6/3. */ case class UserTag(SUUID: String, MAN: Float, WOMAN: Float, AGE10_19: Float, AGE20_29: Float, AGE30_39: Float, AGE40_49: Float, AGE50_59: Float, GAME: Float, MOVIE: Float, MUSIC: Float, ART: Float, POLITICS_NEWS: Float, FINANCIAL: Float, EDUCATION_TRAINING: Float, HEALTH_CARE: Float, TRAVEL: Float, AUTOMOBILE: Float, HOUSE_PROPERTY: Float, CLOTHING_ACCESSORIES: Float, BEAUTY: Float, IT: Float, BABY_PRODUCT: Float, FOOD_SERVICE: Float, HOME_FURNISHING: Float, SPORTS: Float, OUTDOOR_ACTIVITIES: Float, MEDICINE: Float ) object UserTagTable { val LOG = LoggerFactory.getLogger(UserOverviewFirst.getClass) val REP_HOME = s"${AppConfig.HDFS_MASTER}/${AppConfig.HDFS_REP}" def main(args: Array[String]) { var startTime = System.currentTimeMillis() val conf: com.typesafe.config.Config = ConfigFactory.load() val sc = new SparkContext() val sqlContext = new SQLContext(sc) var df1: DataFrame = null if (args.length == 0) { println("请输入: appkey , StartTime : 2016-04-10 ,StartEnd :2016-04-11") } else { var appkey = args(0) var lastdate = args(1) df1 = loadDataFrame(sqlContext, appkey, "2016-04-10", lastdate) df1.registerTempTable("suuidTable") sqlContext.udf.register("taginfo", (a: String) => userTagInfo(a)) sqlContext.udf.register("intToString", (b: Long) => intToString(b)) import sqlContext.implicits._ //***重点***:将临时表中的suuid和自定函数中Json数据,放入UserTag中。 sqlContext.sql(" select distinct(suuid) AS suuid,taginfo(suuid) from suuidTable group by suuid").map { case Row(suuid: String, taginfo: String) => val taginfoObj = JSON.parseObject(taginfo) UserTag(suuid.toString, taginfoObj.getFloat("man"), taginfoObj.getFloat("woman"), taginfoObj.getFloat("age10_19"), taginfoObj.getFloat("age20_29"), taginfoObj.getFloat("age30_39"), taginfoObj.getFloat("age40_49"), taginfoObj.getFloat("age50_59"), taginfoObj.getFloat("game"), taginfoObj.getFloat("movie"), taginfoObj.getFloat("music"), taginfoObj.getFloat("art"), taginfoObj.getFloat("politics_news"), taginfoObj.getFloat("financial"), taginfoObj.getFloat("education_training"), taginfoObj.getFloat("health_care"), taginfoObj.getFloat("travel"), taginfoObj.getFloat("automobile"), taginfoObj.getFloat("house_property"), taginfoObj.getFloat("clothing_accessories"), taginfoObj.getFloat("beauty"), taginfoObj.getFloat("IT"), taginfoObj.getFloat("baby_Product"), taginfoObj.getFloat("food_service"), taginfoObj.getFloat("home_furnishing"), taginfoObj.getFloat("sports"), taginfoObj.getFloat("outdoor_activities"), taginfoObj.getFloat("medicine") )}.toDF().registerTempTable("resultTable") val resultDF = sqlContext.sql(s"select '$appkey' AS APPKEY, '$lastdate' AS DATE,SUUID ,MAN,WOMAN,AGE10_19,AGE20_29,AGE30_39 ," + "AGE40_49 ,AGE50_59,GAME,MOVIE,MUSIC,ART,POLITICS_NEWS,FINANCIAL,EDUCATION_TRAINING,HEALTH_CARE,TRAVEL,AUTOMOBILE," + "HOUSE_PROPERTY,CLOTHING_ACCESSORIES,BEAUTY,IT,BABY_PRODUCT ,FOOD_SERVICE ,HOME_FURNISHING ,SPORTS ,OUTDOOR_ACTIVITIES ," + "MEDICINE from resultTable WHERE SUUID IS NOT NULL") resultDF.write.mode(SaveMode.Overwrite).options( Map("table" -> "USER_TAGS", "zkUrl" -> conf.getString("Hbase.url")) ).format("org.apache.phoenix.spark").save() } } def intToString(suuid: Long): String = { suuid.toString() } def userTagInfo(num1: String): String = { var de = new DecimalFormat("0.00") var mannum = de.format(math.random).toFloat var man = mannum var woman = de.format(1 - mannum).toFloat var age10_19num = de.format(math.random * 0.2).toFloat var age20_29num = de.format(math.random * 0.2).toFloat var age30_39num = de.format(math.random * 0.2).toFloat var age40_49num = de.format(math.random * 0.2).toFloat var age10_19 = age10_19num var age20_29 = age20_29num var age30_39 = age30_39num var age40_49 = age40_49num var age50_59 = de.format(1 - age10_19num - age20_29num - age30_39num - age40_49num).toFloat var game = de.format(math.random * 1).toFloat var movie = de.format(math.random * 1).toFloat var music = de.format(math.random * 1).toFloat var art = de.format(math.random * 1).toFloat var politics_news = de.format(math.random * 1).toFloat var financial = de.format(math.random * 1).toFloat var education_training = de.format(math.random * 1).toFloat var health_care = de.format(math.random * 1).toFloat var travel = de.format(math.random * 1).toFloat var automobile = de.format(math.random * 1).toFloat var house_property = de.format(math.random * 1).toFloat var clothing_accessories = de.format(math.random * 1).toFloat var beauty = de.format(math.random * 1).toFloat var IT = de.format(math.random * 1).toFloat var baby_Product = de.format(math.random * 1).toFloat var food_service = de.format(math.random * 1).toFloat var home_furnishing = de.format(math.random * 1).toFloat var sports = de.format(math.random * 1).toFloat var outdoor_activities = de.format(math.random * 1).toFloat var medicine = de.format(math.random * 1).toFloat "{" + "\"man\"" + ":" + man + "," + "\"woman\"" + ":" + woman + "," + "\"age10_19\"" + ":" + age10_19 + "," + "\"age20_29\"" + ":" + age20_29 + "," + "\"age30_39\"" + ":" + age30_39 + "," + "\"age40_49\"" + ":" + age40_49 + "," + "\"age50_59\"" + ":" + age50_59 + "," + "\"game\"" + ":" + game + "," + "\"movie\"" + ":" + movie + "," + "\"music\"" + ":" + music + "," + "\"art\"" + ":" + art + "," + "\"politics_news\"" + ":" + politics_news + "," + "\"financial\"" + ":" + financial + "," + "\"education_training\"" + ":" + education_training + "," + "\"health_care\"" + ":" + health_care + "," + "\"travel\"" + ":" + travel + "," + "\"automobile\"" + ":" + automobile + "," + "\"house_property\"" + ":" + house_property + "," + "\"clothing_accessories\"" + ":" + clothing_accessories + "," + "\"beauty\"" + ":" + beauty + "," + "\"IT\"" + ":" + IT + "," + "\"baby_Product\"" + ":" + baby_Product + "," + "\"food_service\"" + ":" + food_service + "," + "\"home_furnishing\"" + ":" + home_furnishing + "," + "\"sports\"" + ":" + sports + "," + "\"outdoor_activities\"" + ":" + outdoor_activities + "," + "\"medicine\"" + ":" + medicine + "}"; } def loadDataFrame(ctx: SQLContext, appkey: String, startDay: String, endDay: String): DataFrame = { val path = s"$REP_HOME/appstatistic" ctx.read.parquet(path) .filter(s"timestamp is not null and appkey='$appkey' and day>='$startDay' and day<='$endDay'") } }
以上这篇DataFrame:通过SparkSql将scala类转为DataFrame的方法就是小编分享给大家的全部内容了,希望能给大家一个参考,也希望大家多多支持。
南强小屋 Design By 杰米
广告合作:本站广告合作请联系QQ:858582 申请时备注:广告合作(否则不回)
免责声明:本站文章均来自网站采集或用户投稿,网站不提供任何软件下载或自行开发的软件! 如有用户或公司发现本站内容信息存在侵权行为,请邮件告知! 858582#qq.com
免责声明:本站文章均来自网站采集或用户投稿,网站不提供任何软件下载或自行开发的软件! 如有用户或公司发现本站内容信息存在侵权行为,请邮件告知! 858582#qq.com
南强小屋 Design By 杰米
暂无DataFrame:通过SparkSql将scala类转为DataFrame的方法的评论...
《魔兽世界》大逃杀!60人新游玩模式《强袭风暴》3月21日上线
暴雪近日发布了《魔兽世界》10.2.6 更新内容,新游玩模式《强袭风暴》即将于3月21 日在亚服上线,届时玩家将前往阿拉希高地展开一场 60 人大逃杀对战。
艾泽拉斯的冒险者已经征服了艾泽拉斯的大地及遥远的彼岸。他们在对抗世界上最致命的敌人时展现出过人的手腕,并且成功阻止终结宇宙等级的威胁。当他们在为即将于《魔兽世界》资料片《地心之战》中来袭的萨拉塔斯势力做战斗准备时,他们还需要在熟悉的阿拉希高地面对一个全新的敌人──那就是彼此。在《巨龙崛起》10.2.6 更新的《强袭风暴》中,玩家将会进入一个全新的海盗主题大逃杀式限时活动,其中包含极高的风险和史诗级的奖励。
《强袭风暴》不是普通的战场,作为一个独立于主游戏之外的活动,玩家可以用大逃杀的风格来体验《魔兽世界》,不分职业、不分装备(除了你在赛局中捡到的),光是技巧和战略的强弱之分就能决定出谁才是能坚持到最后的赢家。本次活动将会开放单人和双人模式,玩家在加入海盗主题的预赛大厅区域前,可以从强袭风暴角色画面新增好友。游玩游戏将可以累计名望轨迹,《巨龙崛起》和《魔兽世界:巫妖王之怒 经典版》的玩家都可以获得奖励。