分类目录归档:Linux开发

Linux开发,只要在Linux环境下的一切相关开发,包括c++\goLang\Web,shell命令等。

pika编译及运行

1.基于https://github.com/Qihoo360/pika/的v3.1.1版本,定制出适合业务要求的功能改进版。
https://github.com/kxtry/pika是在v3.1.1基础上,新增了incrbyrange(key,val,min, max)及hincrbyrange(key,val,min, max)这两个命令。
2.官方提供的编译是直接基于docker的编译。

编译Dockerfile文件。
docker build -t pika .

3.提取编译后结果,也可以直接用该容器运行应用

运行应用:
docker run pika:latest bash -c "./bin/pika -c ./conf/pika.conf"
也可提取相关应用至宿主机运行。
docker cp 容器名:/pika/output ./  #docker cp ce4541cc4627:/pika/output ./

4.提取出来的应用,在宿主机上运行,需要安装相关依赖。

安装epel源。
rpm -ivh https://mirrors.ustc.edu.cn/epel/epel-release-latest-7.noarch.rpm
安装glog和protobuf的动态连接库。
sudo yum install -y glog protobuf  #编译时,对应的是glog-devel 和protobuf-devel
如果仍然无法运行,则执行strace ./pika或ldd pika来检查缺少哪些动态库。

5. 运行:

docker run pika:latest bash -c "./bin/pika -c ./conf/pika.conf"

6. 运行脚本run-app.sh

#!/bin/sh
 
# crontab -e
# */1 * * * * sh /data/scripts/run-app.sh start
 
path_current=`pwd`
path_script=$(cd "$(dirname "$0")"; pwd)
path_data=$path_script/data
logfile=$path_data/check.log
mode=$1
 
name=pika
 
app_process=`ps -ef | grep "$name"| grep -v grep`
 
if [ ! -d $path_data ];then 
   mkdir -p $path_data
fi
 
echo `date` >> $logfile
echo "ready to check...." >> $logfile
case "$mode" in
   'install')
      if [ ! -f $path_script/.envok ]; then
         rpm -ivh https://mirrors.ustc.edu.cn/epel/epel-release-latest-7.noarch.rpm
         yum install -y glog protobuf && touch $path_script/.envok
      fi
      if [ ! -f $path_script/conf/pika.conf ]; then
         mkdir -p $path_data && /bin/cp -rf $path_script/pika.conf.template $path_script/conf/pika.conf && echo "$path_script/conf/pika.conf" | xargs /bin/sed -i "s#{{path_current}}#$path_data#g"
      fi
      ;;
   'start')
      echo "$app_process" >> $logfile
      echo "it's ready to start op...."
      if test -n "$app_process"; then
         echo ""
         echo "$app_process"
         echo ""
      else
         cd $path_script  
         nohup $path_script/bin/$name -c $path_script/conf/${name}.conf > $path_data/info.txt 2>&1 &
         echo "success to restart $name" >> $logfile
         cd $path_current
      fi
      echo 'success to start.'
      ;;
   'stop')
      echo "it's ready to check process..."
      if test -n "$app_process"; then
         echo "had find app process informaton"
         echo $app_process | awk '{print ($2)}' | xargs kill -3
      fi
      echo 'success to kill.'
      ;;
   *)
      basename=`basename "$0"`
      echo "Usage: $basename  {install|start|stop}  [ server options ]"
      exit 1
      ;;
esac
exit 1

7.原默认配置脚本pika.conf.template

# Pika port
port : 9221
# Thread Number
thread-num : 50
# Thread Pool Size
thread-pool-size : 100
# Sync Thread Number
sync-thread-num : 10
# Pika log path
log-path : {{path_current}}/log/
# Pika db path
db-path : {{path_current}}/db/
# Pika write-buffer-size
write-buffer-size : 268435456
# Pika timeout
timeout : 60
# Requirepass
requirepass : abc123
# Masterauth
masterauth : abc123
# Userpass
userpass : abc123
# User Blacklist
userblacklist :
# if this option is set to 'classic', that means pika support multiple DB, in
# this mode, option databases enable
# if this option is set to 'sharding', that means pika support multiple Table, you
# can specify partition num for each table, in this mode, option table-list enable
# Pika instance mode [classic | sharding]
instance-mode : classic
# Set the number of databases. The default database is DB 0, you can select
# a different one on a per-connection basis using SELECT <dbid> where
# dbid is a number between 0 and 'databases' - 1, limited in [1, 8]
databases : 1
# Table list
table-list : table1:1,table2:1
# Dump Prefix
dump-prefix :
# daemonize  [yes | no]
daemonize : yes
# Dump Path
dump-path : {{path_current}}/dump/
# Expire-dump-days
dump-expire : 0
# pidfile Path
pidfile : {{path_current}}/pika.pid
# Max Connection
maxclients : 20000
# the per file size of sst to compact, defalut is 2M
target-file-size-base : 20971520
# Expire-logs-days
expire-logs-days : 7
# Expire-logs-nums
expire-logs-nums : 10
# Root-connection-num
root-connection-num : 2
# Slowlog-write-errorlog
slowlog-write-errorlog : no
# Slowlog-log-slower-than
slowlog-log-slower-than : 10000
# Slowlog-max-len
slowlog-max-len : 128
# Pika db sync path
db-sync-path : {{path_current}}/dbsync/
# db sync speed(MB) max is set to 1024MB, min is set to 0, and if below 0 or above 1024, the value will be adjust to 1024
db-sync-speed : -1
# The slave priority
slave-priority : 100
# network interface
#network-interface : eth1
# replication
#slaveof : master-ip:master-port
 
# CronTask, format 1: start-end/ratio, like 02-04/60, pika will check to schedule compaction between 2 to 4 o'clock everyday
#                   if the freesize/disksize > 60%.
#           format 2: week/start-end/ratio, like 3/02-04/60, pika will check to schedule compaction between 2 to 4 o'clock
#                   every wednesday, if the freesize/disksize > 60%.
#           NOTICE: if compact-interval is set, compact-cron will be mask and disable.
#
#compact-cron : 3/02-04/60
 
# Compact-interval, format: interval/ratio, like 6/60, pika will check to schedule compaction every 6 hours,
#                           if the freesize/disksize > 60%. NOTICE:compact-interval is prior than compact-cron;
#compact-interval :
 
# server-id for hub
server-id : 1
 
###################
## Critical Settings
###################
# write_binlog  [yes | no]
write-binlog : yes
# binlog file size: default is 100M,  limited in [1K, 2G]
binlog-file-size : 104857600
# Automatically triggers a small compaction according statistics
# Use the cache to store up to 'max-cache-statistic-keys' keys
# if 'max-cache-statistic-keys' set to '0', that means turn off the statistics function
# it also doesn't automatically trigger a small compact feature
max-cache-statistic-keys : 0
# When 'delete' or 'overwrite' a specific multi-data structure key 'small-compaction-threshold' times,
# a small compact is triggered automatically, default is 5000, limited in [1, 100000]
small-compaction-threshold : 5000
# If the total size of all live memtables of all the DBs exceeds
# the limit, a flush will be triggered in the next DB to which the next write
# is issued.
max-write-buffer-size :  10737418240
# Compression
compression : snappy
# max-background-flushes: default is 1, limited in [1, 4]
max-background-flushes : 1
# max-background-compactions: default is 2, limited in [1, 8]
max-background-compactions : 2
# max-cache-files default is 5000
max-cache-files : 5000
# max_bytes_for_level_multiplier: default is 10, you can change it to 5
max-bytes-for-level-multiplier : 10
# BlockBasedTable block_size, default 4k
# block-size: 4096
# block LRU cache, default 8M, 0 to disable
# block-cache: 8388608
# whether the block cache is shared among the RocksDB instances, default is per CF
# share-block-cache: no
# whether or not index and filter blocks is stored in block cache
# cache-index-and-filter-blocks: no
# when set to yes, bloomfilter of the last level will not be built
# optimize-filters-for-hits: no
# https://github.com/facebook/rocksdb/wiki/Leveled-Compaction#levels-target-size
# level-compaction-dynamic-level-bytes: no

8.目录结构如下:

Top
 |--->bin
 |--->conf
 |--->tool
pika.conf.template
run-app.sh

sparksql的操作实践

KMR
1.登录KMR
2.切换致spark帐号【su – spark】
3.进入spark-shell的命令行操作界面

spark-shell --master=yarn

4.常见命令如下

spark.sql("create external table bhabc(`userid` bigint,`id` int,`date` string,`count` bigint,`opcnt` int,`start` int,`end` int) partitioned by (dt string) row format delimited fields terminated by ','  stored as sequencefile location '/data/behavior/bh_abc_dev'").show
spark.sql("show tables").show
spark.sql("show databases").show
spark.sql("show tables").show
spark.sql("show partitions bhwps").show
spark.sql("alter table bhwps add partition(dt='2019-05-21')").show
spark.sql("select * from bhwps where dt between '2019-05-15' and '2019-05-31' order by `count` desc").show
spark.sql("alter table bhwps add partition(dt='2019-06-22') partition(dt='2019-06-23')").show增加多个分区
spark.sql("msck repair table bhwps").show 修复分区就是重新同步hdfs上的分区信息。
spark.sql("show partitions bhraw").show(100,false) 可以显示超过20个记录。

5.常见问题:
》目录权限问题
可以用hdfs dfs -chown -r /path来修改目录权限。

清理垃圾桶
hdfs dfs -expunge

普通用户也可重启nginx

1.修改nginx.conf为abc:root启动

  user  abc abc;  #以普通用户启,但实际上是master为root,worker进程是abc。
或 
  user abc root;

2.修改nginx

chown root nginx #nginx必须是root用户拥有者。
chmod a+s nginx  #把x属性改为s属性。
ll nginx
-rwsr-sr-x 1 root root 2030760 Mar  5 11:52 nginx

七牛日志分析

七牛日志格式

116.231.10.133 HIT 0 [07/May/2019:09:04:56 +0800] "GET http://wdl1.cache.wps.cn/per-plugin/dl/addons/pool/win-i386/wpsminisite_3.0.0.37.7z HTTP/1.1" 206 66372 "-" "-"

Qt的相关代码

int getDownloadSize(QString line) {
    int idx = line.indexOf("HTTP/1.1\"");
    if(idx > 0) {
        QString sub = line.mid(idx);
        QStringList sets = sub.split(" ");
        int size = sets.at(2).toInt();
        return size;
    }
    return 0;
}
 
 
int main(int argc, char *argv[])
{
    QCoreApplication a(argc, argv);
 
    QString dpath("F:\\日志0508\\18");
 
    QDir d(dpath);
    QStringList fs = d.entryList();
    qint64 plug_total = 1;
    qint64 other_total = 1;
    for(int i = 0; i < fs.size(); i++) {
        QString f = fs.at(i);
        QFile file(dpath+"\\"+f);
        if(!file.open(QIODevice::ReadOnly|QIODevice::Text)) {
            continue;
        }
        int linecnt = 0;
        while(!file.atEnd()) {
            QByteArray line = file.readLine();
            if(line.indexOf("wdl1.cache.wps.cn") < 0) {
                continue;
            }
            int idx = line.indexOf("win-i386");
            int size = getDownloadSize(line);
            qint64 skidx = file.pos();
            if(idx > 0) {
                plug_total += size;
            }else{
                other_total += size;
            }
            linecnt++;
            if(linecnt % 10000 == 0) {
                printf("\r\nlinecnt:%d - skidx:%lld - plug_total:%lld - other_total:%lld - ratio:%5f", linecnt, skidx, plug_total, other_total, double(plug_total) / double(other_total));
            }
        }
        printf("\r\nlinecnt:%d - plug_total:%lld - other_total:%lld - ratio:%5f", linecnt, plug_total, other_total, double(plug_total) / double(other_total));
    }
    printf("\r\nplug_total:%lld - other_total:%lld - ratio:%5f", plug_total, other_total, double(plug_total) / double(other_total));
    return a.exec();
}

获取响应内容

log_format cntr escape=json '{"time":"$time_local",'
                        '"@source":"$server_addr",'
                        '"hostname":"$hostname",'
                        '"xforward":"$http_x_forwarded_for",'
                        '"remoteaddr":"$remote_addr",'
                        '"method":"$request_method",'
                        '"scheme":"$scheme",'
                        '"domain":"$server_name",'
                        '"referer":"$http_referer",'
                        '"requrl":"$request_uri",'
                        '"args":"$args",'
                        '"requestbody":"$request_body",'
                        '"contentlength":"$content_length",'
                        '"bodybytessend":$body_bytes_sent,'
                        '"status":$status,'
                        '"requesttime":$request_time,'
                        '"upstreamtime":"$upstream_response_time",'
                        '"upstreamaddr":"$upstream_addr",'
                        '"respbody":"$resp_body",'
                        '"respdown":"$resp_ctrl_down",'
                        '"useragent":"$http_user_agent"'
                        '}';
location /abc {
        #access_log off;
        access_log /data/logs/nginx/abc/access.log cntr buffer=32k flush=5s;
        error_log /data/logs/nginx/abc/error.log info;
 
        set $resp_ctrl_down -1;
        set $resp_body "";
        # only elk collection machine, can add the below lua code.
        body_filter_by_lua_block {
            local chunk, eof = ngx.arg[1], ngx.arg[2]
            ngx.var.resp_body=ngx.var.resp_body..chunk
            if eof then
               if string.find(ngx.var.resp_body,'"count":true') then
                  ngx.var.resp_ctrl_down = 1
               else
                  ngx.var.resp_ctrl_down = 0
               end
            end
        }
 
        proxy_pass http://abc/flow;
    }

fedora换阿里云镜像源

su -
cd /etc/yum.repos.d/
mv fedora.repo fedora.repo.backup
mv fedora-updates.repo fedora-updates.repo.backup
wget -O /etc/yum.repos.d/fedora.repo http://mirrors.aliyun.com/repo/fedora.repo
wget -O /etc/yum.repos.d/fedora-updates.repo http://mirrors.aliyun.com/repo/fedora-updates.repo
dnf clean all
dnf makecache

自动拉起服务脚本

crontab -e
*/1 * * * * sh /data/scripts/run-flow.sh start
#!/bin/sh
 
path_current=`pwd`
path_script=$(cd "$(dirname "$0")"; pwd)
mode=$1
 
logfile=/data/scripts/check.log
app_process=`ps -ef | grep "flowservice"| grep -v grep`
echo `date` >> $logfile
echo "ready to check...." >> $logfile
case "$mode" in
   'start')
        echo "$app_process" >> $logfile
        echo "it's ready to start op...."
        if test -n "$app_process"; then
                echo ""
                echo "$app_process"
                echo ""
        else
                cd $path_script   #进入脚本所在目录下,目的是使springboot的config目录生效。
                nohup /data/code/service.flow.wps.cn/flowservice --config=/data/code/service.flow.wps.cn/config/config-prod.toml > /data/code/service.flow.wps.cn/info.txt 2>&1 &
                echo "success to restart flowservice" >> $logfile
                cd $path_current
 
        fi
 
        echo 'success to start.'
        ;;
   'stop')
        echo "it's ready to check process..."
        if test -n "$app_process"; then
                echo "had find app process informaton"
                echo $app_process | awk '{print ($2)}' | xargs kill -9
        fi
        echo 'success to kill.'
        ;;
    *)
        basename=`basename "$0"`
        echo "Usage: $basename  {start|stop}  [ server options ]"
        exit 1
        ;;
esac
exit 1