近期在linux上搭建了用于分布式存儲的----GlusterFS和Ceph這兩個開源的分布式文件系統。
前言----大家可以去github上搜索一下,看源碼或者官方文檔介紹,更多的去了解GlusterFS以及Ceph,在這里我就不一一的去介紹原理以及抽象技術層面的基礎知識。下面我就搭建部署過程中遇到的問題,向大家做一個介紹及部署過程的詳細流程。同時,也希望研究或者喜好這方面的同學,帶有生產環境成熟方案或者意見的,也請在這里互相討論,我們一起共同進步,感興趣的同學可以加我微信(請備注請求與來源),互相溝通交流。
下面開始GlusterFS的詳細介紹:
我的硬件配置:
CPU 4核,8G內存,Centos 7.1 內核版本 3.10.0? 64位? GlusterFS 版本:3.10.2,掛載3塊:其中一塊500M用于系統,另外兩塊用于數據存儲? 40G磁盤,將數據分散在2臺服務器,同時文件存儲2份,那么需要4臺服務器,允許損壞一臺服務器而數據不丟失。
下載yum源:yum -y install centos-release-gluster
安裝server:
[root@vm-10-58-57-104 glusterFS]# yum install glusterfs-server
--安裝所需的其余包
[root@vm-10-58-57-105 /]# yum install -y glusterfs glusterfs-fuse xfsprogs
---配置開機啟動
[root@vm-10-58-57-105 /]# cd /bin
[root@vm-10-58-57-105 bin]# systemctl enable glusterd.service
[root@vm-10-58-57-105 bin]# systemctl start glusterd
[root@vm-10-58-57-105 bin]# ps -ef | grep gluster
root? ? ? 9460? ? 1? 0 13:34 ?? ? ? ? 00:00:00 /usr/sbin/glusterd -p /var/run/glusterd.pid --log-level INFO
root? ? ? 9476 22277? 0 13:34 pts/0? ? 00:00:00 grep --color=auto gluster
--設置集群節點
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57.104
peer probe: success. Probe on localhost not needed
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57
peer probe: failed: Probe returned with Transport endpoint is not connected
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57.105
peer probe: success.
[root@vm-10-58-57-104 glusterFS]# gluster peer probe 10.58.57.100
peer probe: success.
[root@vm-10-58-57-104 glusterFS]# gluster peer status
Number of Peers: 2
Hostname: 10.58.57.105
Uuid: e9b2a1da-c50e-45e1-8d1c-02bfee4b0920
State: Peer in Cluster (Connected)
Hostname: 10.58.57.100
Uuid: 2a067d8f-b1b0-43f7-b532-0c1a29a0c60c
State: Peer in Cluster (Connected)
--創建分布式副本條帶卷
[root@vm-10-58-57-104 glusterFS]# gluster volume create sr22 stripe 2 replica 2 transport tcp 10.58.57.104:/app/glusterFS/ 10.58.57.102:/app/glusterFS/ 10.58.57.105:/app/glusterFS/ 10.58.57.100:/app/glusterFS/
volume create: sr22: success: please start the volume to access data
[root@vm-10-58-57-104 glusterFS]# gluster volume info
Volume Name: sr22
Type: Striped-Replicate
Volume ID: 9095bcf1-256a-4c6c-aa16-1f6d2e664ed7
Status: Created
Snapshot Count: 0
Number of Bricks: 1 x 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 10.58.57.104:/app/glusterFS
Brick2: 10.58.57.102:/app/glusterFS
Brick3: 10.58.57.105:/app/glusterFS
Brick4: 10.58.57.100:/app/glusterFS
Options Reconfigured:
transport.address-family: inet
nfs.disable: on
[root@vm-10-58-57-104 glusterFS]# gluster volume info
Volume Name: sr22
Type: Striped-Replicate
Volume ID: 9095bcf1-256a-4c6c-aa16-1f6d2e664ed7
Status: Created
Snapshot Count: 0
Number of Bricks: 1 x 2 x 2 = 4
Transport-type: tcp
Bricks:
Brick1: 10.58.57.104:/app/glusterFS
Brick2: 10.58.57.102:/app/glusterFS
Brick3: 10.58.57.105:/app/glusterFS
Brick4: 10.58.57.100:/app/glusterFS
Options Reconfigured:
transport.address-family: inet
nfs.disable: on
[root@vm-10-58-57-104 glusterFS]# gluster volume status
Volume sr22 is not started
[root@vm-10-58-57-104 glusterFS]# gluster volume start sr22
volume start: sr22: success
[root@vm-10-58-57-104 glusterFS]# gluster volume status
Status of volume: sr22
Gluster process? ? ? ? ? ? ? ? ? ? ? ? ? ? TCP Port? RDMA Port? Online? Pid
------------------------------------------------------------------------------
Brick 10.58.57.104:/app/glusterFS? ? ? ? ? 49152? ? 0? ? ? ? ? Y? ? ? 26251
Brick 10.58.57.102:/app/glusterFS? ? ? ? ? 49152? ? 0? ? ? ? ? Y? ? ? 23797
Brick 10.58.57.105:/app/glusterFS? ? ? ? ? 49152? ? 0? ? ? ? ? Y? ? ? 19907
Brick 10.58.57.100:/app/glusterFS? ? ? ? ? 49152? ? 0? ? ? ? ? Y? ? ? 6879
Self-heal Daemon on localhost? ? ? ? ? ? ? N/A? ? ? N/A? ? ? ? Y? ? ? 26271
Self-heal Daemon on 10.58.57.100? ? ? ? ? ? N/A? ? ? N/A? ? ? ? Y? ? ? 6899
Self-heal Daemon on 10.58.57.105? ? ? ? ? ? N/A? ? ? N/A? ? ? ? Y? ? ? 19927
Self-heal Daemon on 10.58.57.102? ? ? ? ? ? N/A? ? ? N/A? ? ? ? Y? ? ? 23817
Task Status of Volume sr22
------------------------------------------------------------------------------
There are no active volume tasks
----創建glusterFS-Client 客戶端
[root@vm-10-58-57-104 app]# mkdir gs-client
[root@vm-10-58-57-104 app]# cd /bin
[root@vm-10-58-57-104 bin]# mount -t glusterfs 10.58.57.104:/sr22 /app/gs-client/
[root@vm-10-58-57-104 bin]# cd /app/gs-client
[root@vm-10-58-57-104 gs-client]# df -h;
Filesystem? ? ? ? ? ? ? Size? Used Avail Use% Mounted on
/dev/mapper/centos-root? 32G? 1.9G? 30G? 6% /
devtmpfs? ? ? ? ? ? ? ? 3.9G? ? 0? 3.9G? 0% /dev
tmpfs? ? ? ? ? ? ? ? ? ? 3.9G? ? 0? 3.9G? 0% /dev/shm
tmpfs? ? ? ? ? ? ? ? ? ? 3.9G? 385M? 3.6G? 10% /run
tmpfs? ? ? ? ? ? ? ? ? ? 3.9G? ? 0? 3.9G? 0% /sys/fs/cgroup
/dev/mapper/centos-app? ? 40G? 7.3G? 33G? 19% /app
/dev/vda1? ? ? ? ? ? ? ? 509M? 120M? 389M? 24% /boot
tmpfs? ? ? ? ? ? ? ? ? ? 799M? ? 0? 799M? 0% /run/user/2014
tmpfs? ? ? ? ? ? ? ? ? ? 799M? ? 0? 799M? 0% /run/user/0
10.58.57.104:/sr22? ? ? ? 80G? 14G? 67G? 17% /app/gs-client
---查看系統配額及查看每個卷中的brick的io信息
[root@vm-10-58-57-104 app]# gluster volume quota sr22 list
quota command failed : Quota is disabled, please enable quota
[root@vm-10-58-57-104 app]# gluster volume profile sr22 start
Starting volume profile on sr22 has been successful
[root@vm-10-58-57-104 app]# gluster volume profile sr22 info
Brick: 10.58.57.104:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4158 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4158 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Brick: 10.58.57.100:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Brick: 10.58.57.102:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Brick: 10.58.57.105:/app/glusterFS
----------------------------------
Cumulative Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
Interval 0 Stats:
%-latency? Avg-latency? Min-Latency? Max-Latency? No. of calls? ? ? ? Fop
---------? -----------? -----------? -----------? ------------? ? ? ? ----
0.00? ? ? 0.00 us? ? ? 0.00 us? ? ? 0.00 us? ? ? ? ? ? 24? RELEASEDIR
Duration: 4157 seconds
Data Read: 0 bytes
Data Written: 0 bytes
[root@vm-10-58-57-104 app]# gluster volume profile sr22 stop
Stopping volume profile on sr22 has been successful
[root@vm-10-58-57-104 app]# gluster volume profile sr22 info
Profile on Volume sr22 is not started
[root@vm-10-58-57-104 app]#
-------接下來使用iozone工具進行測試----------
-----安裝iozone測試工具
摘要: iozone是一個非常專業的文件系統性能測試開源軟件,用法和介紹可以參考如下: http://www.iozone.org/ http://www.iozone.org/docs/IOzone_msword_98.
iozone是一個非常專業的文件系統性能測試開源軟件,用法和介紹可以參考如下:
http://www.iozone.org/
http://www.iozone.org/docs/IOzone_msword_98.pdf
使用源碼安裝,步驟如下:
下載最新的穩定版源碼:
http://www.iozone.org/src/current/
[root@vm-10-58-57-104 tools]# rpm -ivh iozone-3-465.src.rpm
Updating / installing...
1:iozone-3-465? ? ? ? ? ? ? ? ? ? ################################# [100%]
warning: user capps does not exist - using root
warning: group capps does not exist - using root
[root@vm-10-58-57-104 app]#cd ~/rpmbuild/SOURCES
[root@vm-10-58-57-104 SOURCES]# tar -xvf iozone3_465.tar
最終我們cd到src里面的current這個目錄下
[root@vm-10-58-57-104 current]# less? makefile
[root@vm-10-58-57-104 current]# make lib
libasync.c? libbif.c
[root@vm-10-58-57-104 current]# make linux-AMD64
Building iozone for Linux-AMD64cc -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -DNAME='"linux-AMD64"' \-D__AMD64__ -DSHARED_MEM -Dlinux -D_LARGEFILE64_SOURCE \-DHAVE_PREAD? iozone.c -o iozone_linux-AMD64.occ -c -O3 -Dunix -DHAVE_ANSIC_C -DASYNC_IO -D_LARGEFILE64_SOURCE \-DSHARED_MEM -Dlinux? libbif.c -o libbif.occ -c -O3 -Dunix -Dlinux -DHAVE_ANSIC_C -DASYNC_IO \-D_LARGEFILE64_SOURCE? libasync.c? -o libasync.oBuilding fileop for Linux-AMD64cc -Wall -c -O3? fileop.c -o fileop_linux-AMD64.oBuilding the pit_servercc -c? pit_server.c? -o pit_server.occ? -O3? iozone_linux-AMD64.o libbif.o libasync.o \-lrt -lpthread -o iozonecc? -O3 -Dlinux fileop_linux-AMD64.o -o fileopcc? -O3 -Dlinux pit_server.o -o pit_server[root@vm-10-58-57-104 current]#[root@vm-10-58-57-104 current]# ./fileop -h? ? --------------------------------------? ? |? ? ? ? ? ? ? Fileop? ? ? ? ? ? ? ? |? ? |? ? ? ? $Revision: 1.61 $? ? ? ? ? |? ? |? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? |? ? |? ? ? ? ? ? ? ? by? ? ? ? ? ? ? ? ? |? ? |? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? ? |? ? |? ? ? ? ? ? Don Capps? ? ? ? ? ? ? |? ? --------------------------------------? ? fileop [-f X ]|[-l # -u #] [-s Y] [-e] [-b] [-w] [-d] [-t] [-v] [-h]? ? -f #? ? ? Force factor. X^3 files will be created and removed.? ? -l #? ? ? Lower limit on the value of the Force factor.? ? -u #? ? ? Upper limit on the value of the Force factor.? ? -s #? ? ? Optional. Sets filesize for the create/write. May use suffix 'K' or 'M'.? ? -e? ? ? ? Excel importable format.? ? -b? ? ? ? Output best case results.? ? -i #? ? ? Increment force factor by this increment.? ? -w? ? ? ? Output worst case results.? ? -dSpecify starting directory.? ? -UMount point to remount between tests.
-t? ? ? ? Verbose output option.
-v? ? ? ? Version information.
-h? ? ? ? Help text.
The structure of the file tree is:
X number of Level 1 directories, with X number of
level 2 directories, with X number of files in each
of the level 2 directories.
Example:? fileop 2
dir_1? ? ? ? ? ? ? ? ? ? ? ? dir_2
/? ? \? ? ? ? ? ? ? ? ? ? ? /? ? \
sdir_1? ? ? sdir_2? ? ? ? ? sdir_1? ? ? sdir_2
/? ? \? ? /? ? \? ? ? ? ? /? ? \? ? ? /? ? \
file_1 file_2 file_1 file_2? file_1 file_2 file_1 file_2
Each file will be created, and then Y bytes is written to the file.
[root@vm-10-58-57-104 current]# ./pit_server -h
Usage: pit_server [-v] -p service
[root@vm-10-58-57-104 current]# ./iozone -h
iozone: help mode
Usage: iozone [-s filesize_kB] [-r record_size_kB] [-f [path]filename] [-h]
[-i test] [-E] [-p] [-a] [-A] [-z] [-Z] [-m] [-M] [-t children]
[-l min_number_procs] [-u max_number_procs] [-v] [-R] [-x] [-o]
[-d microseconds] [-F path1 path2...] [-V pattern] [-j stride]
[-T] [-C] [-B] [-D] [-G] [-I] [-H depth] [-k depth] [-U mount_point]
[-S cache_size] [-O] [-L cacheline_size] [-K] [-g maxfilesize_kB]
[-n minfilesize_kB] [-N] [-Q] [-P start_cpu] [-e] [-c] [-b Excel.xls]
[-J milliseconds] [-X write_telemetry_filename] [-w] [-W]
[-Y read_telemetry_filename] [-y minrecsize_kB] [-q maxrecsize_kB]
[-+u] [-+m cluster_filename] [-+d] [-+x multiplier] [-+p # ]
[-+r] [-+t] [-+X] [-+Z] [-+w percent dedupable] [-+y percent_interior_dedup]
[-+C percent_dedup_within]
-a? Auto mode
-A? Auto2 mode
-b Filename? Create Excel worksheet file
-B? Use mmap() files
-c? Include close in the timing calculations
-C? Show bytes transferred by each child in throughput testing
-d #? Microsecond delay out of barrier
-D? Use msync(MS_ASYNC) on mmap files
-e? Include flush (fsync,fflush) in the timing calculations
-E? Run extension tests
-f filename? to use
-F filenames? for each process/thread in throughput test
-g #? Set maximum file size (in kBytes) for auto mode (or #m or #g)
-G? Use msync(MS_SYNC) on mmap files
-h? help
-H #? Use POSIX async I/O with # async operations
-i #? Test to run (0=write/rewrite, 1=read/re-read, 2=random-read/write
3=Read-backwards, 4=Re-write-record, 5=stride-read, 6=fwrite/re-fwrite
7=fread/Re-fread, 8=random_mix, 9=pwrite/Re-pwrite, 10=pread/Re-pread
11=pwritev/Re-pwritev, 12=preadv/Re-preadv)
-I? Use VxFS VX_DIRECT, O_DIRECT,or O_DIRECTIO for all file operations
-j #? Set stride of file accesses to (# * record size)
-J #? milliseconds of compute cycle before each I/O operation
-k #? Use POSIX async I/O (no bcopy) with # async operations
-K? Create jitter in the access pattern for readers
-l #? Lower limit on number of processes to run
-L #? Set processor cache line size to value (in bytes)
-m? Use multiple buffers
-M? Report uname -a output
-n #? Set minimum file size (in kBytes) for auto mode (or #m or #g)
-N? Report results in microseconds per operation
-o? Writes are synch (O_SYNC)
-O? Give results in ops/sec.
-p? Purge on
-P #? Bind processes/threads to processors, starting with this cpu
-q #? Set maximum record size (in kBytes) for auto mode (or #m or #g)
-Q? Create offset/latency files
-r #? record size in Kb
or -r #k .. size in kB
or -r #m .. size in MB
or -r #g .. size in GB
-R? Generate Excel report
-s #? file size in Kb
or -s #k .. size in kB
or -s #m .. size in MB
or -s #g .. size in GB
-S #? Set processor cache size to value (in kBytes)
-t #? Number of threads or processes to use in throughput test
-T? Use POSIX pthreads for throughput tests
-u #? Upper limit on number of processes to run
-U? Mount point to remount between tests
-v? version information
-V #? Verify data pattern write/read
-w? Do not unlink temporary file
-W? Lock file when reading or writing
-x? Turn off stone-walling
-X filename? Write telemetry file. Contains lines with (offset reclen compute_time) in ascii
-y #? Set minimum record size (in kBytes) for auto mode (or #m or #g)
-Y filename? Read? telemetry file. Contains lines with (offset reclen compute_time) in ascii
-z? Used in conjunction with -a to test all possible record sizes
-Z? Enable mixing of mmap I/O and file I/O
-+b #,#? burst size (KB),sleep between burst (mili-second)
-+E Use existing non-Iozone file for read-only testing
-+F Truncate file before write in thread_mix_test
-+J Include think time (-j #) in throughput calculation
-+K Sony special. Manual control of test 8.
-+m? Cluster_filename? Enable Cluster testing
-+d? File I/O diagnostic mode. (To troubleshoot a broken file I/O subsystem)
-+u? Enable CPU utilization output (Experimental)
-+x # Multiplier to use for incrementing file and record sizes
-+p # Percentage of mix to be reads
-+r Enable O_RSYNC|O_SYNC for all testing.
-+t Enable network performance test. Requires -+m
-+n No retests selected.
-+k Use constant aggregate data set size.
-+q Delay in seconds between tests.
-+l Enable record locking mode.
-+L Enable record locking mode, with shared file.
-+B Sequential mixed workload.
-+D Enable O_DSYNC mode.
-+A #? Enable madvise. 0 = normal, 1=random, 2=sequential
3=dontneed, 4=willneed
-+N Do not truncate existing files on sequential writes.
-+S # Dedup-able data is limited to sharing within each numerically
identified file set.
-+W # Add this value to the child thread ID, so that additional files
can be added while maintaining the proper dedupability with previously
existing files that are within the same seed group (-+S).
-+V Enable shared file. No locking.
-+X Enable short circuit mode for filesystem testing ONLY
ALL Results are NOT valid in this mode.
-+Z Enable old data set compatibility mode. WARNING.. Published
hacks may invalidate these results and generate bogus, high
values for results.
-+w ## Percent of dedup-able data in buffers.
-+y ## Percent of dedup-able within & across files in buffers.
-+C ## Percent of dedup-able within & not across files in buffers.
-+H Hostname? ? Hostname of the PIT server.
-+P Service? ? Service? of the PIT server.
-+z Enable latency histogram logging.
--------------------------------------------------------------------------------------------------------------------------------
iozone是一種性能測試工具,可以在找一些資料直接了解如何測試,也可聯系我,我們互相溝通