Public/User_Guide/FTI: config.fti

File config.fti, 5.1 KB (added by Kai Keller, 5 years ago)

FTI default configuration file

Line 
1##############   FTI CONFIGURATION FILE   ###############
2
3# *****************************************************************
4# *** Here are the main parameters you should provide to FTI ******
5# *****************************************************************
6[Basic]
7
8# Set to 1 if you want to dedicate 1 MPI rank per node to FTI
9# set to 0 if you want ALL ckpt. post-processing to be done inline
10Head = 0
11
12# The number of processes launched per node (Same for every node)
13# including FTI-dedicated process.
14Node_size = 2
15
16# LOCAL directory where the local checkpoints will be stored
17# This directory MUST exist and have write access
18Ckpt_dir = ./Local      #/path/to/local/storage/
19
20# GLOBAL directory where the global checkpoints will be stored
21# This directory MUST exist and have write access
22Glbl_dir = ./Global     #/path/to/global/storage/
23
24# GLOBAL directory where the FTI metadata will be stored
25# This directory MUST exist and have write access
26Meta_dir = ./Meta       #/home/username/.fti
27
28# Level 1 ckpt interval in minutes of L1 ckpts (Local write)
29Ckpt_L1 = 3
30
31# Level 2 ckpt interval in minutes of L2 ckpts (Partner copy)
32Ckpt_L2 = 5
33
34# Level 3 ckpt interval in minutes of L3 ckpts (Reed-Solomon)
35Ckpt_L3 = 7
36
37# Level 4 ckpt interval in minutes of L4 ckpts (PFS write)
38Ckpt_L4 = 11
39
40# dCP interval in minutes for level 4 checkpoints
41# dCP - differential checkpointing
42# This setting requires io_mode=3 (FTI-FF) and dcp_enabled=1
43Dcp_L4 = 0
44
45# 1 if Level 2 ckpt is inline (synchronous) 0 if not (asynchronous)
46Inline_L2 = 1
47
48# 1 if Level 3 ckpt is inline (synchronous) 0 if not (asynchronous)
49Inline_L3 = 1
50
51# 1 if Level 4 ckpt is inline (synchronous) 0 if not (asynchronous)
52Inline_L4 = 1
53
54# Set to 1 if you want to save the last checkpoint taken before finalize
55# Set to 0 if you want to erase all checkpoints after finalize
56keep_last_ckpt = 0
57
58# Enabled, all level 4 checkpoints of the execution will be kept in 'Glbl_dir/l4_archive'
59keep_l4_ckpt   = 0
60
61# The size of the encoding groups (Something between 4 and 16)
62# The total number of nodes MUST be multiple of this parameter
63Group_size = 4
64
65# Number of iterations between iteration length sync (0 => 512 iterations)
66# If you app has iterations of varying length set this value between (1 and 10)
67max_sync_intv               = 0
68
69# Set to:
70# 1 -> POSIX
71# 2 -> MPI-IO
72# 3 -> FTI-FF
73# 4 -> SIONLib
74# 5 -> HDF5.
75ckpt_io                     = 1
76
77# Enable staging feature
78Enable_Staging              = 0
79
80# Enable differential checkpointing (dCP)
81Enable_dCP                  = 0
82
83# Select dCP hashing algorithm:
84# 1 -> MD5
85# 2 -> CRC32
86# The modes may be set as well by the environment variable 'FTI_DCP_HASH_MODE=[0|1]'
87# This will overwrite the setting from the configuration file!
88dCP_Mode                    = 0
89
90# Set hash-partition block size
91# The partition block size, b,  must be: 512 < b < USHRT_MAX (Bytes)
92# b may be set as well by the environment variable 'FTI_DCP_BLOCK_SIZE=b (in bytes)'
93# This will overwrite the setting from the configuration file!
94dCP_Block_Size              = 16384
95
96# The verbosity of FTI. (2 is recommended)
97# 3 (Print only errors, silent mode)
98# 2 (Print errors and some few important information)
99# 1 (Print debug messages, very verbose)
100Verbosity = 2
101
102# *****************************************************************
103# *** Change these parameters ONLY in case of restart   ***********
104# *****************************************************************
105
106[Restart]
107
108# Set this to 0 if you are launching this job for the first time
109# Set this to 1 if you are recovering this job after a failure
110Failure = 0
111
112# Set with the execution ID in case of restart after failure
113# Set to NULL if normal execution
114Exec_ID = XXXX-XX-XX_XX-XX-XX
115
116
117# *****************************************************************
118# *** Change these parameters to inject failures.       ***********
119# *****************************************************************
120
121[Injection]
122
123# Rank of the process that injects the failures
124rank = 0
125
126# Total number of bit-flips to inject
127number = 0
128
129# Bit position of the injection
130position = 0
131
132# Injection frequency in seconds
133frequency = 0
134
135
136# *****************************************************************
137# *** Change something here ONLY if you know what you are doing ***
138# *****************************************************************
139[Advanced]
140
141# The ckpt files are decomposed in blocks of size Block_size KB
142Block_size = 1024
143
144# The ckpt files are transfered in chunks of size Transfer_size MB
145# from local to PFS
146Transfer_size = 16
147
148# The tags for MPI communications done within the FTI library
149general_tag = 2612
150ckpt_tag = 711   
151stage_tag = 406
152final_tag = 3107
153
154# Set to 1 if you are doing a test in local in a single computer
155Local_test = 1
156
157#This option only impacts if -DENABLE_LUSTRE was added to the Cmake command.
158#It sets the striping unit for the MPI-IO file.
159lustre_striping_unit        = 4194304
160
161#This option only impacts if -DENABLE_LUSTRE was added to the Cmake command.
162#It sets the striping factor for the MPI-IO file.
163lustre_striping_factor      = -1
164
165#This option only impacts if -DENABLE_LUSTRE was added to the Cmake command.
166#It sets the striping offset for the MPI-IO file.
167lustre_striping_offset      = -1
168
169