分类目录归档:docker

woweb的yii2容器化服务部署

version: '3.1'
 
networks:
  default:
    driver: bridge
    driver_opts:
      com.docker.network.enable_ipv6: "false"
    ipam:
      driver: default
      config:
        - subnet: 192.168.57.0/24
 
services:
  mysql:
    image: mysql:5.5.60
    restart: always
    environment:
      MYSQL_ROOT_PASSWORD: abc3.0123
    # links:
    ports:
      - 13306:3306
      # 13306端口是补人使用的,不能随便修改。
    volumes:
      - ./data/share/localtime:/etc/localtime:ro
      - ./data/share/timezone:/etc/timezone:ro
      - ./data/mysql/data:/var/lib/mysql
 
  php:
    image: yiisoftware/yii2-php:7.2-fpm
    restart: always
    ports:
      - 9900:9000
    links:
      - mysql:mysql
    extra_hosts:
      - mysql.woterm.com:abc.24.129.221
    depends_on:
      - mysql
    volumes:
      - ./data/share/localtime:/etc/localtime:ro
      - ./data/share/timezone:/etc/timezone:ro
      - ./data/wwwroot:/home/wwwroot
      - ./data/wwwlogs:/home/wwwlogs
      - ./../../woweb:/home/wwwroot/woweb
    # php-fpm运行的用户为www-data,需要将wwwroot的权限为[chmod a+w ]
 
  nginx:
    image: nginx:1.13.6
    restart: always
    ports:
      - 80:80
    links:
      - mysql
      - php
    depends_on:
      - mysql
      - php
    volumes:
      - ./data/share/localtime:/etc/localtime:ro
      - ./data/share/timezone:/etc/timezone:ro
      - ./data/nginx/conf/nginx.conf:/etc/nginx/nginx.conf:ro
      - ./data/nginx/conf/vhost:/etc/nginx/vhost:ro
      - ./data/wwwroot:/home/wwwroot
      - ./data/wwwlogs:/home/wwwlogs
      - ./../../woweb:/home/wwwroot/woweb
 
  ftp:
    image: stilliard/pure-ftpd
    restart: always
    ports:
      - "21:21"
    volumes:
      - ./data/vsftp:/home/vsftp
    environment:
      FTP_USER_NAME: uftp
      FTP_USER_PASS: xxxxxxx
      FTP_USER_HOME: ./data/vsftp/home

Dockerfile的CMD和ENTRYPOINT的关系

以下示范,表示该形式下的CMD与ENTRYPOINT的关系。
CMD相当于应用程序的参数,ENTRYPOINT相当于应用的main入口或主程序入口。

FROM centos 
CMD ["echo 'p222 in cmd'"]     #传递给ENTRYPOINT的参数项。
ENTRYPOINT ["echo"]     #应用入口,相当于程序的main函数

1.构建

docker build  -t test .
<pre>
2.执行以下指令执行默认的CMD命令。
<pre lang="geshi">
docker run test
输出结果:
echo 'p222 in cmd'

3.修改程序输入参数

docker run test abct123
输出结果:
abct123

kafka容器化部署

1.参考https://github.com/wurstmeister/kafka-docker的实现。
2.参考https://github.com/simplesteph/kafka-stack-docker-compose
3.基于上述两个参考,实现以下的部署文件。

version: '3.1'
 
services:
  zoo1:
    image: zookeeper:3.4.9
    hostname: zoo1
    ports:
      - "2181:2181"
    environment:
        ZOO_MY_ID: 1
        ZOO_PORT: 2181
        ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
    volumes:
      - ./zk-multiple-kafka-multiple/zoo1/data:/data
      - ./zk-multiple-kafka-multiple/zoo1/datalog:/datalog
 
  zoo2:
    image: zookeeper:3.4.9
    hostname: zoo2
    ports:
      - "2182:2182"
    environment:
        ZOO_MY_ID: 2
        ZOO_PORT: 2182
        ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
    volumes:
      - ./zk-multiple-kafka-multiple/zoo2/data:/data
      - ./zk-multiple-kafka-multiple/zoo2/datalog:/datalog
 
  zoo3:
    image: zookeeper:3.4.9
    hostname: zoo3
    ports:
      - "2183:2183"
    environment:
        ZOO_MY_ID: 3
        ZOO_PORT: 2183
        ZOO_SERVERS: server.1=zoo1:2888:3888 server.2=zoo2:2888:3888 server.3=zoo3:2888:3888
    volumes:
      - ./zk-multiple-kafka-multiple/zoo3/data:/data
      - ./zk-multiple-kafka-multiple/zoo3/datalog:/datalog
 
 
  kafka1:
    image: wurstmeister/kafka:2.12-2.0.1
    container_name: kafka1
    hostname: kafka1
    ports:
      - "9092:9092"
      - "1099:1099"
    environment:
      KAFKA_ZOOKEEPER_CONNECT: "zoo1:2181,zoo2:2182,zoo3:2183"
      KAFKA_BROKER_ID: 1
      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LISTENERS: PLAINTEXT://:9092
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://192.168.10.100:9092
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_DEFAULT_REPLICATION_FACTOR: 3
      KAFKA_JMX_OPTS: "-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=127.0.0.1 -Dcom.sun.management.jmxremote.rmi.port=1099"
      JMX_PORT: 1099
    volumes:
      - ./zk-multiple-kafka-multiple/kafka1:/kafka
      - /var/run/docker.sock:/var/run/docker.sock
    depends_on:
      - zoo1
      - zoo2
      - zoo3
 
  kafka2:
    image: wurstmeister/kafka:2.12-2.0.1
    container_name: kafka2
    hostname: kafka2
    ports:
      - "9093:9092"
      - "2099:1099"
    environment:
      KAFKA_ZOOKEEPER_CONNECT: "zoo1:2181,zoo2:2182,zoo3:2183"
      KAFKA_BROKER_ID: 2
      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LISTENERS: PLAINTEXT://:9092
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://192.168.10.100:9093
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_DEFAULT_REPLICATION_FACTOR: 3
      KAFKA_JMX_OPTS: "-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=127.0.0.1 -Dcom.sun.management.jmxremote.rmi.port=1099"
      JMX_PORT: 1099
    volumes:
      - ./zk-multiple-kafka-multiple/kafka2:/kafka
      - /var/run/docker.sock:/var/run/docker.sock
    depends_on:
      - zoo1
      - zoo2
      - zoo3
 
  kafka3:
    image: wurstmeister/kafka:2.12-2.0.1
    container_name: kafka3
    hostname: kafka3
    ports:
      - "9094:9092"
      - "3099:1099"
    environment:
      KAFKA_ZOOKEEPER_CONNECT: "zoo1:2181,zoo2:2182,zoo3:2183"
      KAFKA_BROKER_ID: 3
      KAFKA_LOG4J_LOGGERS: "kafka.controller=INFO,kafka.producer.async.DefaultEventHandler=INFO,state.change.logger=INFO"
      KAFKA_AUTO_CREATE_TOPICS_ENABLE: "true"
      KAFKA_LISTENERS: PLAINTEXT://:9092
      KAFKA_ADVERTISED_LISTENERS: PLAINTEXT://192.168.10.100:9094
      KAFKA_OFFSETS_TOPIC_REPLICATION_FACTOR: 3
      KAFKA_DEFAULT_REPLICATION_FACTOR: 3
      KAFKA_JMX_OPTS: "-Dcom.sun.management.jmxremote -Dcom.sun.management.jmxremote.authenticate=false -Dcom.sun.management.jmxremote.ssl=false -Djava.rmi.server.hostname=127.0.0.1 -Dcom.sun.management.jmxremote.rmi.port=1099"
      JMX_PORT: 1099
    volumes:
      - ./zk-multiple-kafka-multiple/kafka3:/kafka
      - /var/run/docker.sock:/var/run/docker.sock
    depends_on:
      - zoo1
      - zoo2
      - zoo3
 
  manager:
    image: hlebalbau/kafka-manager:2.0.0.2
    hostname: manager
    ports:
      - "9000:9000"
    environment:
      ZK_HOSTS: "zoo1:2181,zoo2:2182,zoo3:2183"
      APPLICATION_SECRET: "random-secret"
      KAFKA_MANAGER_AUTH_ENABLED: "true"
      KAFKA_MANAGER_USERNAME: "abc"
      KAFKA_MANAGER_PASSWORD: "123"
    command: -Dpidfile.path=/dev/null

4.测试文件
基于https://github.com/segmentio/kafka-go库的示范,实现如下:

package kaf
 
import (
	"context"
	"fmt"
	"github.com/segmentio/kafka-go"
	"log"
	"time"
)
 
func LeaderProduce() {
	topic := "my-topic"
	partition := 0
 
	conn, err := kafka.DialLeader(context.Background(), "tcp", "localhost:9092", topic, partition)
	if err != nil {
		log.Fatal(err)
	}
	conn.SetWriteDeadline(time.Now().Add(10 * time.Second))
	conn.WriteMessages(
		kafka.Message{Value: []byte(fmt.Sprint("one!", time.Now()))},
		kafka.Message{Value: []byte(fmt.Sprint("two!", time.Now()))},
		kafka.Message{Value: []byte(fmt.Sprint("three!", time.Now()))},
	)
 
	conn.Close()
}
 
func LeaderConsumer() {
	topic := "my-topic"
	partition := 0
 
	conn, _ := kafka.DialLeader(context.Background(), "tcp", "localhost:9092", topic, partition)
 
	conn.SetReadDeadline(time.Now().Add(10 * time.Second))
	batch := conn.ReadBatch(10e3, 1e6) // fetch 10KB min, 1MB max
	for {
		msg, err := batch.ReadMessage()
		if err != nil {
			break
		}
		fmt.Println(string(msg.Value))
	}
 
	batch.Close()
	conn.Close()
}
 
func ClusterProduce(port int) {
	// make a writer that produces to topic-A, using the least-bytes distribution
	w := kafka.NewWriter(kafka.WriterConfig{
		Brokers:  []string{"localhost:9092", "localhost:9093", "localhost:9094"},
		Topic:    "topic-A",
		Balancer: &kafka.LeastBytes{},
	})
 
	err := w.WriteMessages(context.Background(),
		kafka.Message{
			Key:   []byte("Key-A"),
			Value: []byte(fmt.Sprint("Hello World!", time.Now())),
		},
		kafka.Message{
			Key:   []byte("Key-B"),
			Value: []byte(fmt.Sprint("One!", time.Now())),
		},
	)
	if err != nil {
		fmt.Println(port, "error", err)
	}
 
	w.Close()
}
 
func clusterConsume(port int) {
	// make a new reader that consumes from topic-A
	r := kafka.NewReader(kafka.ReaderConfig{
		Brokers:  []string{"localhost:9092", "localhost:9093", "localhost:9094"},
		GroupID:  "consumer-group-id",
		Topic:    "topic-A",
		MinBytes: 1024 * 10, // 10KB
		MaxBytes: 10e6,      // 10MB
	})
 
	for {
		m, err := r.ReadMessage(context.Background())
		if err != nil {
			fmt.Println(port, "error.....", err)
			time.Sleep(time.Second * 10)
			continue
		}
		fmt.Printf("%v--message at topic/partition/offset %v/%v/%v: %s = %s\n", port, m.Topic, m.Partition, m.Offset, string(m.Key), string(m.Value))
		// time.Sleep(time.Second)
	}
 
	r.Close()
}

ZooKeeper的容器化配置

docker pull zookeeper
https://github.com/getwingm/kafka-stack-docker-compose

version: '3.1'
 
services:
  zoo1:
    image: zookeeper
    restart: always
    hostname: zoo1
    ports:
      - 2181:2181
    environment:
      ZOO_MY_ID: 1
      ZOO_SERVERS: server.1=0.0.0.0:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=zoo3:2888:3888;2181
 
  zoo2:
    image: zookeeper
    restart: always
    hostname: zoo2
    ports:
      - 2182:2181
    environment:
      ZOO_MY_ID: 2
      ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=0.0.0.0:2888:3888;2181 server.3=zoo3:2888:3888;2181
 
  zoo3:
    image: zookeeper
    restart: always
    hostname: zoo3
    ports:
      - 2183:2181
    environment:
      ZOO_MY_ID: 3
      ZOO_SERVERS: server.1=zoo1:2888:3888;2181 server.2=zoo2:2888:3888;2181 server.3=0.0.0.0:2888:3888;2181

sparksql的操作实践

KMR
1.登录KMR
2.切换致spark帐号【su – spark】
3.进入spark-shell的命令行操作界面

spark-shell --master=yarn

4.常见命令如下

spark.sql("create external table bhabc(`userid` bigint,`id` int,`date` string,`count` bigint,`opcnt` int,`start` int,`end` int) partitioned by (dt string) row format delimited fields terminated by ','  stored as sequencefile location '/data/behavior/bh_abc_dev'").show
spark.sql("show tables").show
spark.sql("show databases").show
spark.sql("show tables").show
spark.sql("show partitions bhwps").show
spark.sql("alter table bhwps add partition(dt='2019-05-21')").show
spark.sql("select * from bhwps where dt between '2019-05-15' and '2019-05-31' order by `count` desc").show
spark.sql("alter table bhwps add partition(dt='2019-06-22') partition(dt='2019-06-23')").show增加多个分区
spark.sql("msck repair table bhwps").show 修复分区就是重新同步hdfs上的分区信息。
spark.sql("show partitions bhraw").show(100,false) 可以显示超过20个记录。

5.常见问题:
》目录权限问题
可以用hdfs dfs -chown -r /path来修改目录权限。

清理垃圾桶
hdfs dfs -expunge

阿里云Docker私人专属镜像加速

vim /etc/docker/daemon.json

https://cr.console.aliyun.com/cn-hangzhou/instances/mirrors

{
“bip”:”192.168.55.1/24″,
“registry-mirrors”: [“https://2na48vbddcw.mirror.aliyuncs.com”]
}

把我常用的字母移除到只有8个字母。
sudo systemctl daemon-reload
sudo systemctl restart docker

HIVE的sequenceFile的操作常用命令

sequencefile是一组Key和Value的健值对。在实际中HIVE创建表时,key是没有无意义的。它只根据value的格式进行切换数据。
0.登录容器并连接上hive

docker-compose -f docker-compose-hive.yml exec hive-server  bash
/opt/hive/bin/beeline -u jdbc:hive2://localhost:10000

1.建表

 
create external table sfgz(
     `idx` string,
     `userid` string,
     `flag` string,
     `count` string,
     `value` string,
     `memo` string)
  partitioned by (dt string)
  row format delimited fields terminated by ','
  stored as sequencefile
  location '/user/sfgz';

2.分区加载
方法一:
hadoop fs -mkdir -p /user/sfgz/dt=2010-05-06/
hadoop fs -put /tools/mytest.txt.sf /user/sfgz/dt=2019-05-17
hadoop fs -put /tools/mytest.txt.sf /user/sfgz/dt=2010-05-04
这样是无法直接被hive所识别的,必须用alter table partition的命令把相应的分区表加入至数据库中,才能正常访问。
方法二,加载完就可以直接查询的:
load data local inpath ‘/tools/mytest.txt.sf’ into table sfgz partition(dt=’2009-03-01′);这种方法是可以直接查询了。
load data local inpath ‘/tools/mytest.gzip.sf’ into table sfgz partition(dt=’2000-03-02′);
3. 检查分区信息:
show partitions sfgz;
4. 添加分区
alter table sfgz add partition(dt=’2000-03-03′);
5. 插入一条记录:

   insert into sfgz partition(dt='2019-05-16')values('idx3','uid6','5','6','34.7','uid3test2');

6. 统计指令:
select count(*) from sfgz; 在KMR中不支持这种方式。
select count(idx) from sfgz; 在KMR中只支持这种方式。
6. 其它常见命令
show databases;
use database;
whow tables;
select * from sfgz where dt=’2000-03-03′;
msck repair table sfgz; 分区修复指令:

docker-hive的操作验试

1.下载docker镜像库:https://github.com/big-data-europe/docker-hive.git,并安装它。
2.修改其docker-compose.yml文件,为每个容器增加上映射。

version: "3"
 
services:
  namenode:
    image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
    volumes:
      - /data/namenode:/hadoop/dfs/name
      - /data/tools:/tools
    environment:
      - CLUSTER_NAME=test
    env_file:
      - ./hadoop-hive.env
    ports:
      - "50070:50070"
  datanode:
    image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
    volumes:
      - /data/datanode:/hadoop/dfs/data
      - /data/tools:/tools
    env_file:
      - ./hadoop-hive.env
    environment:
      SERVICE_PRECONDITION: "namenode:50070"
    ports:
      - "50075:50075"
  hive-server:
    image: bde2020/hive:2.3.2-postgresql-metastore
    volumes:
      - /data/tools:/tools
    env_file:
      - ./hadoop-hive.env
    environment:
      HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://hive-metastore/metastore"
      SERVICE_PRECONDITION: "hive-metastore:9083"
    ports:
      - "10000:10000"
  hive-metastore:
    image: bde2020/hive:2.3.2-postgresql-metastore
    volumes:
      - /data/tools:/tools
    env_file:
      - ./hadoop-hive.env
    command: /opt/hive/bin/hive --service metastore
    environment:
      SERVICE_PRECONDITION: "namenode:50070 datanode:50075 hive-metastore-postgresql:5432"
    ports:
      - "9083:9083"
  hive-metastore-postgresql:
    image: bde2020/hive-metastore-postgresql:2.3.0
    volumes:
      - /data/tools:/tools
 
  presto-coordinator:
    image: shawnzhu/prestodb:0.181
    volumes:
      - /data/tools:/tools
    ports:
      - "8080:8080"

2.创建测试文本

1,xiaoming,book-TV-code,beijing:chaoyang-shagnhai:pudong
2,lilei,book-code,nanjing:jiangning-taiwan:taibei
3,lihua,music-book,heilongjiang:haerbin
3,lihua,music-book,heilongjiang2:haerbin2
3,lihua,music-book,heilongjiang3:haerbin3

3.启动并连接HIVE服务。

docker-compose up -d
docker-compose exec hive-server bash
/opt/hive/bin/beeline -u jdbc:hive2://localhost:10000


4.创建外部表

create external table t2(
    id      int
   ,name    string
   ,hobby   array<string>
   ,add     map<String,string>
)
row format delimited
fields terminated by ','
collection items terminated by '-'
map keys terminated by ':'
location '/user/t2'


5.文件上传到上步骤中的目录内。
方法1:在HIVE的beeline终端中采用:
load data local inpath ‘/tools/example.txt’ overwrite into table t2; 删除已经存在的所有文件,然后写入新的文件。
load data local inpath ‘/tools/example.txt’ into table t2; 在目录中加入新的文件【差异在overwrite】。
方法2:用hadoop fs -put的文件上传功能。
hadoop fs -put /tools/example.txt /user/t2 文件名不改变。
hadoop fs -put /tools/example.txt /user/t2/1.txt 文件名为1.txt
6.在HIVE命令行中验证

select * from t2;  上传一次文件,执行一次。


7.在hadoop的文件管理器,也可以浏览到新上传的文件。

同一个文件中的记录是会自动作去重处理的。

——————————————-
如果是sequencefile呢?
1.检验sequencefile的内容。
hadoop fs -Dfs.default.name=file:/// -text /tools/mytest.gzip.sf 废弃的
hadoop fs -Dfs.defaultFS=file:/// -text /tools/mytest.txt.sf

实际内容是:

2.建表

  create external table sfgz(
     `idx` string,
     `userid` string,
     `flag` string,
     `count` string,
     `value` string,
     `memo` string)
  partitioned by (dt string)
  row format delimited fields terminated by ','
  stored as sequencefile
  location '/user/sfgz';

3.上传文件

方法一:
hadoop fs -mkdir -p /user/sfgz/dt=2010-05-06/
hadoop fs -put /tools/mytest.txt.sf /user/sfgz/dt=2019-05-17
hadoop fs -put /tools/mytest.txt.sf /user/sfgz/dt=2010-05-04
这种方法,还需要人为Reload一下才行,其reload指令是:
方法二:
load data local inpath '/tools/mytest.txt.sf' into table sfgz partition(dt='2009-03-01');这种方法是可以直接查询了。
load data local inpath '/tools/mytest.gzip.sf' into table sfgz partition(dt='2000-03-02');

spark/hive的镜像Github

Big Data Europe
目前最靠谱的样板
https://github.com/big-data-europe/docker-spark
https://github.com/big-data-europe/docker-hive
https://github.com/big-data-europe

HIVE文档
https://cwiki.apache.org/confluence/display/Hive/Home#Home-UserDocumentation

WIKI的docker部署

1.Dockerfiles编写

FROM centos:6.6
 
ENV CONF_INST  /opt/atlassian/
ENV CONF_HOME  /var/atlassian/application-data/
 
 
COPY ./confluence-5.4.4.tar.gz /confluence-5.4.4.tar.gz
COPY ./application-data-init.tar.gz /application-data-init.tar.gz
RUN set -x && yum install -y tar && mkdir -p ${CONF_INST} && tar -xvf /confluence-5.4.4.tar.gz --directory "${CONF_INST}/"
 
COPY ./startup.sh /startup.sh
RUN chmod +x /startup.sh
 
EXPOSE 8090
VOLUME ["${CONF_HOME}", "${CONF_INST}"]
CMD ["/startup.sh"]

2.docker-compose.yml的编写

version: '3.1'
 
services:
  confluence:
    image: wiki:1.0
    restart: always
    ports:
      - 8090:8090
    #entrypoint: bash -c "ping 127.0.0.1"
    #command: bash -c "ping 127.0.0.1"
    #command: /opt/atlassian/confluence/bin/catalina.sh run
    volumes:
      - /data/atlassian/confluence/logs:/opt/atlassian/confluence/logs
      - /data/atlassian/confluence/logs:/opt/atlassian/application-data/confluence/logs
      - /data/atlassian/application-data:/var/atlassian/application-data
      - ./backups:/var/atlassian/application-data/confluence/backups
      - ./restore:/var/atlassian/application-data/confluence/restore:ro
      - /etc/localtime:/etc/localtime:ro
      - /etc/timezone:/etc/timezone:ro
    build:
      context: ./crack
      dockerfile: Dockerfile