存储过程
-- 删除排班表多余的数据
CREATE PROCEDURE p_del_coch()
BEGIN
delete from class_order_config_history where id in (
SELECT id from (
SELECT * from (
SELECT row_number() over(PARTITION by a.producer_id,a.class_order ORDER BY a.crt desc) as rn,a.* from class_order_config_history a
where DATE_FORMAT(now(),'%Y-%m-%d')=DATE_FORMAT(crt,'%Y-%m-%d') and producer_id not in ('2367963040512475140'))
tmp where tmp.rn>=2) tt );
END;
CREATE EVENT IF NOT EXISTS e_autoTotal
ON SCHEDULE EVERY 1 week STARTS '2021-08-15 04:02:00'
ON COMPLETION PRESERVE ENABLE
DO CALL p_del_coch();
alter event second_event on completion preserve enable;//开启定时任务
alter event second_event on completion preserve disable;//关闭定时任务
CREATE PROCEDURE p_del_tb1()
BEGIN
delete from t_1 where output_add_m is NULL;
END;
CREATE EVENT IF NOT EXISTS e_autoDel_tb1
ON SCHEDULE EVERY 1 HOUR STARTS '2021-08-15 04:02:00'
ON COMPLETION PRESERVE ENABLE
DO CALL p_del_tb1();
CALL p_del_tb1();
CREATE EVENT IF NOT EXISTS e_autoTotal
ON SCHEDULE EVERY 1 MONTH STARTS DATE_ADD(DATE_ADD(DATE_SUB(CURDATE(),INTERVAL DAY(CURDATE())-1 DAY),INTERVAL 1 MONTH),INTERVAL 1 HOUR)
ON COMPLETION PRESERVE ENABLE
DO CALL p_del_coch();
CREATE EVENT demo_event5
ON SCHEDULE EVERY 1 DAY STARTS '2020-11-20 00:00:00'
ON COMPLETION PRESERVE
DO TRUNCATE table `demo_1119`
已弃用:
hs_spin.ods_wide_production
mvn assembly:assembly
mvn clean install -P test
D:/apache-maven-3.0.5
./bin/spark-submit \
--class com.imooc.spark.Test.TestOfSparkContext2 \
--master local[2] \
/home/hadoop/data/test-jar/sql-1.0.jar
/opt/spark/spark-2.1.1-bin-hadoop2.6/jars
npm i -g n --force
npm i -g n stable --force
export MANAGE_LOCAL_HBASE=true
export MANAGE_LOCAL_SOLR=true
set MANAGE_LOCAL_HBASE=true
set MANAGE_LOCAL_SOLR=true
jps
taskkill /pid 7132
taskkill /F /pid 112508
mvn clean -DskipTests package -Pdist,embedded-hbase-solr
mvn -Dmaven.test.skip=true clean package install -Prelease
cd dolphinscheduler-dist
mvn -Dmaven.test.skip=true package -Prelease
mvn -Dmaven.test.skip=true -Dmaven.javadoc.skip=true clean package install -Prelease
mvn -Dmaven.test.skip=true -Dmaven.javadoc.opts='-Xdoclint:-html' clean package install -Prelease
-Dfile.encoding=GB2312
current_timestamp as t_start_time,
'9999-12-30' AS t_end_time,'1' yw_status
java -classpath lib/* org.apache.dolphinscheduler.server.master.MasterServer
-Didea.maven3.use.compat.resolver
concat(r.producer_id,'_',r.host_id,'_',r.dev_group,'_',r.name,'_',r.class_order,'_',r.model_id,'_',r.variety,'_',substr(MIN(r.htime),1,10)) key,
./mvnw clean install -Prelease
yw_status: 数据业务状态 1代表有效,0代表无效,默认1
MySqlUtils.zip_host(spark, thisHour)
ods_wide_host_external
ods_min_yield_val
=weekday(a1,2)>5
=AND(A3<>"",WEEKDAY(A3,2)>5)
=AND(I3<>"",WEEKDAY(I3,2)>5)
datasource.properties
修改 dolphinscheduler-dao 模块的 datasource.properties
修改 dolphinscheduler-service 模块的 zookeeper.properties
#spring.datasource.driver-class-name=com.mysql.jdbc.Driver
spring.datasource.driver-class-name=com.mysql.cj.jdbc.Driver
spring.datasource.url=jdbc:mysql://127.0.0.1:3306/dolphinscheduler?useUnicode=true&characterEncoding=UTF-8&useSSL=false&serverTimezone = GMT
spring.datasource.username=root2
spring.datasource.password=root
-Dlogging.config=classpath:logback-master.xml -Ddruid.mysql.usePingMethod=false
{"serverTimezone":"GMT"}
test1/admin123456
install_config.conf
source:
hs_spin.ods_wide_host_increment
hs_spin.ods_min_yield_val
target:
dws.dws_hs_variety_shift
spark.sql(sqlStr).show(10, truncate = false)
set hive.exec.dynamic.partition=true;
set hive.exec.dynamic.partition.mode=nonstrict;
set hive.exec.max.dynamic.partitions.pernode=1000;
insert into TABLE dws.dws_ext_wide_production2 partition(dt)
SELECT key, name, host_id, producer_id, dev_group, variety, class_order, class_order_alias, htime, output_add_m, output_add_kg, efficiency, class_type, offline_time, crt,htime as dt
FROM dws.dws_ext_wide_production limit 10;
ALTER TABLE table_name ADD PARTITION (patitionname = man);
npm i node-sass --sass_binary_site=https://npm.taobao.org/mirrors/node-sass/
spark-shell --master spark://localhost:7077 --executor-memory 1g --total-executor-cores 2
spark-shell --master spark://hadoop360:8088 --executor-memory 1g --total-executor-cores 2
cd /app/spark/spark-2.2.0-bin-2.9.0/bin
./spark-shell --master local[3] --jars /app/mysql-connector-java-5.1.46.jar
bin/spark-shell --master spark://hadoop1:7077,hadoop2:7077 --executor-memory 1g --total-executor-cores 2 --driver-class-path /home/tuzq/software/spark-1.6.2-bin-hadoop2.6/lib/mysql-connector-java-5.1.38.jar
scan 'hs_spin:data_item_val',{STARTROW=>'1441341441',STOPROW=>'1441341441z'}
scan 'hs_spin:data_item_val',{STARTROW=>'1441598502',STOPROW=>'1441598502z'}
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>3.0.0</version>
</dependency>
val rows2: Array[Row] = frame2.collect()
for(row<-rows2){
//保存到redis
jedis.set(row.get(0).toString,row.get(1).toString)
}
<!-- https://mvnrepository.com/artifact/redis.clients/jedis -->
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>2.0.0</version>
</dependency>
//1.连接redis
val pool: JedisPool = new JedisPool(new GenericObjectPoolConfig, "qianfeng01", 6379)
val jedis: Jedis = pool.getResource
jedis.auth("123456")
//问题2.计算每个商品分类的成交量(结果保存到redis中)
val sql1=
"""
|select producer,sum(cast(price as int)) as sum
|from t1
|group by producer
|""".stripMargin
val frame1: DataFrame = spark.sql(sql1)
frame1.show()
val rows1: Array[Row] = frame1.collect()
for(b<-rows1){
//保存到redis
jedis.set(b.get(0).toString,b.get(1).toString)
}
<dependency>
<groupId>redis.clients</groupId>
<artifactId>jedis</artifactId>
<version>3.0.0</version>
</dependency>
ps -ef | grep atlas
echo stat|nc localhost 2181
spark.sql(" ").show(10,false)
create 'ns1:t1', 'f1'
create 'hs_spin:ods_wide_val_inc', 'cf'
create 'hs_spin:ods_wide_val_his', 'cf'
/opt/spark/spark-2.1.1-bin-hadoop2.6/bin/spark-submit --class com.hangshu.impl.MainCtl \
--master yarn \
--deploy-mode cluster \
--num-executors 6 \
--driver-memory 2g \
--executor-memory 5g \
--executor-cores 2 \
--conf spark.default.parallelism=18 \
--queue thequeue \
/opt/spark_jar/dataAnalys-1.0.jar
jdbc.datasource.size: 10
jdbc.driver: com.mysql.jdbc.Driver
jdbc.url: jdbc:mysql://192.168.3.83:21020/hs_spin?user=root&password=hhss@*()2019&useUnicode=true&characterEncoding=utf8&allowMultiQueries=true&autoReconnect=true&failOverReadOnly=false&rewriteBatchedStatements=true
jdbc.user: Hsapi
jdbc.password: Hsapi-2016
hive.metastore.uris: thrift://hadoop361:9083
spark.sql.warehouse.dir: /user/hive/warehouse
hbase.zookeeper.quorum: hadoop362,hadoop363,hadoop364
lzdmiscrt: true
hs.run: false
.replace("$condition", condition)
condition: StringBuilder
where 1=1 $condition
from_unixtime(unix_timestamp(t3.end_time)+3600*2)
from_unixtime(unix_timestamp('$dateHour')-3600*2)
$condition
@transient lazy val log = Logger.getLogger(this.getClass)
log.info("过滤条件===》"+condition)
MyConfigRecovery.run(spark, mySqlConfig, DataTableConstants.ODS_MIN_YIELD_VAL, params._2,condition)
DateUtils.getlastdaytime(dateHour,-1,DateUtils.DATE_FORMAT)
spark.sql(sqlStr).show(10)
spark.sql(sqlStr2).show()
Hbase清除表数据
1.清空表数据 会关闭划分好的region
truncate ‘表名’
2.只清空数据,保留region划分
truncate_preserve ‘表名’
truncate_preserve 'hs_spin:ods_ext_item_origin'
describe 'hs_spin:ods_ext_item_origin'
count 'hs_spin:ods_ext_item_origin'
dwd层是数据明细层, 存储事实表的明细数据和维护维度表, 并把事实表和维度表做join, 把维度冗余到事实表中.