Skip to content

Commit

Permalink
Nd bias bootstrap (#374)
Browse files Browse the repository at this point in the history
Adds protected tensor maps gathered during test set inference
Adds bootstrapped performance comparison per class of protected tensor maps
  • Loading branch information
ndiamant authored Jul 29, 2020
1 parent c3abae1 commit 18839e7
Show file tree
Hide file tree
Showing 8 changed files with 667 additions and 185 deletions.
9 changes: 6 additions & 3 deletions ml4cvd/arguments.py
Original file line number Diff line number Diff line change
Expand Up @@ -50,11 +50,13 @@ def parse_args():
)

# Tensor Map arguments
parser.add_argument('--input_tensors', default=[], nargs='+')
parser.add_argument('--output_tensors', default=[], nargs='+')
parser.add_argument('--sample_weight', default=None, help='TensorMap key for sample weight in training.')
parser.add_argument('--input_tensors', default=[], nargs='*')
parser.add_argument('--output_tensors', default=[], nargs='*')
parser.add_argument('--protected_tensors', default=[], nargs='*')
parser.add_argument('--sample_weight', default=None, help='TensorMap key for sample weight in training.')
parser.add_argument('--tensor_maps_in', default=[], help='Do not set this directly. Use input_tensors')
parser.add_argument('--tensor_maps_out', default=[], help='Do not set this directly. Use output_tensors')
parser.add_argument('--tensor_maps_protected', default=[], help='Do not set this directly. Use protected_tensors')

# Input and Output files and directories
parser.add_argument(
Expand Down Expand Up @@ -414,6 +416,7 @@ def _process_args(args):
)
args.tensor_maps_out.extend([_get_tmap(ot, needed_tensor_maps) for ot in args.output_tensors])
args.tensor_maps_out = parent_sort(args.tensor_maps_out)
args.tensor_maps_protected = [_get_tmap(it, needed_tensor_maps) for it in args.protected_tensors]

args.bottleneck_type = BOTTLENECK_STR_TO_ENUM[args.bottleneck_type]
if args.bottleneck_type == BottleneckType.NoBottleNeck:
Expand Down
321 changes: 191 additions & 130 deletions ml4cvd/plots.py

Large diffs are not rendered by default.

122 changes: 81 additions & 41 deletions ml4cvd/recipes.py

Large diffs are not rendered by default.

19 changes: 15 additions & 4 deletions ml4cvd/tensor_generators.py
Original file line number Diff line number Diff line change
Expand Up @@ -730,6 +730,7 @@ def get_train_valid_test_paths_split_by_csvs(
def test_train_valid_tensor_generators(
tensor_maps_in: List[TensorMap],
tensor_maps_out: List[TensorMap],
tensor_maps_protected: List[TensorMap],
tensors: str,
batch_size: int,
num_workers: int,
Expand All @@ -751,9 +752,10 @@ def test_train_valid_tensor_generators(
**kwargs
) -> Tuple[TensorGenerator, TensorGenerator, TensorGenerator]:
""" Get 3 tensor generator functions for training, validation and testing data.
:param tensor_maps_in: list of TensorMaps that are input names to a model
:param tensor_maps_out: list of TensorMaps that are output from a model
:param tensor_maps_protected: list of TensorMaps that are sensitive to bias from a model
only added to the test set
:param tensors: directory containing tensors
:param batch_size: number of examples in each mini-batch
:param num_workers: number of processes spun off for training and testing. Validation uses half as many workers
Expand Down Expand Up @@ -799,9 +801,18 @@ def test_train_valid_tensor_generators(

num_train_workers = int(training_steps / (training_steps + validation_steps) * num_workers) or (1 if num_workers else 0)
num_valid_workers = int(validation_steps / (training_steps + validation_steps) * num_workers) or (1 if num_workers else 0)
generate_train = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, train_paths, num_train_workers, cache_size, weights, keep_paths, mixup_alpha, name='train_worker', siamese=siamese, augment=True, sample_weight=sample_weight)
generate_valid = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, valid_paths, num_valid_workers, cache_size, weights, keep_paths, name='validation_worker', siamese=siamese, augment=False)
generate_test = TensorGenerator(batch_size, tensor_maps_in, tensor_maps_out, test_paths, num_workers, 0, weights, keep_paths or keep_paths_test, name='test_worker', siamese=siamese, augment=False)
generate_train = TensorGenerator(
batch_size, tensor_maps_in, tensor_maps_out, train_paths, num_train_workers, cache_size, weights,
keep_paths, mixup_alpha, name='train_worker', siamese=siamese, augment=True, sample_weight=sample_weight,
)
generate_valid = TensorGenerator(
batch_size, tensor_maps_in, tensor_maps_out, valid_paths, num_valid_workers, cache_size, weights,
keep_paths, name='validation_worker', siamese=siamese, augment=False,
)
generate_test = TensorGenerator(
batch_size, tensor_maps_in, tensor_maps_out+tensor_maps_protected, test_paths, num_workers, 0, weights,
keep_paths or keep_paths_test, name='test_worker', siamese=siamese, augment=False,
)
return generate_train, generate_valid, generate_test


Expand Down
11 changes: 8 additions & 3 deletions ml4cvd/tensor_maps_by_hand.py
Original file line number Diff line number Diff line change
Expand Up @@ -653,10 +653,15 @@
},
)

TMAPS['genetic_caucasian'] = TensorMap('Genetic-ethnic-grouping_Caucasian_0_0', Interpretation.CATEGORICAL, path_prefix='categorical', channel_map={'no_caucasian': 0, 'caucasian': 1})
TMAPS['genetic_caucasian'] = TensorMap(
'Genetic-ethnic-grouping_Caucasian_0_0', Interpretation.CATEGORICAL,
storage_type=StorageType.CATEGORICAL_FLAG, path_prefix='categorical',
channel_map={'no_caucasian': 0, 'Genetic-ethnic-grouping_Caucasian_0_0': 1},
)
TMAPS['genetic_caucasian_weighted'] = TensorMap(
'Genetic-ethnic-grouping_Caucasian_0_0', Interpretation.CATEGORICAL, path_prefix='categorical',
channel_map={'no_caucasian': 0, 'caucasian': 1}, loss=weighted_crossentropy([10.0, 1.0], 'caucasian_loss'),
'Genetic-ethnic-grouping_Caucasian_0_0', Interpretation.CATEGORICAL, storage_type=StorageType.CATEGORICAL_FLAG,
path_prefix='categorical', channel_map={'no_caucasian': 0, 'Genetic-ethnic-grouping_Caucasian_0_0': 1},
loss=weighted_crossentropy([10.0, 1.0], 'caucasian_loss'),
)

TMAPS['mothers_age'] = TensorMap(
Expand Down
4 changes: 2 additions & 2 deletions ml4cvd/tensor_writer_ukbb.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@
'cine_segmented_sax_b3', 'cine_segmented_sax_b4', 'cine_segmented_sax_b5', 'cine_segmented_sax_b6', 'cine_segmented_sax_b7',
'cine_segmented_sax_b8', 'cine_segmented_sax_b9', 'cine_segmented_sax_b10', 'cine_segmented_sax_b11', 'cine_segmented_sax_b12',
'cine_segmented_sax_b13', 'cine_segmented_sax_inlinevf', 'cine_segmented_lax_inlinevf', 'cine_segmented_ao_dist',
'cine_segmented_lvot', 'flow_250_tp_aov_bh_epat@c_p', 'flow_250_tp_aov_bh_epat@c', 'flow_250_tp_aov_bh_epat@c_mag'
'cine_segmented_lvot', 'flow_250_tp_aov_bh_epat@c_p', 'flow_250_tp_aov_bh_epat@c', 'flow_250_tp_aov_bh_epat@c_mag',
]
MRI_CARDIAC_SERIES_SEGMENTED = [series+'_segmented' for series in MRI_CARDIAC_SERIES]
MRI_BRAIN_SERIES = ['t1_p2_1mm_fov256_sag_ti_880', 't2_flair_sag_p2_1mm_fs_ellip_pf78']
Expand Down Expand Up @@ -503,7 +503,7 @@ def _tensorize_short_and_long_axis_segmented_cardiac_mri(
series_segmented = f'{series}_segmented'
series_zoom = f'{series}_zoom'
series_zoom_segmented = f'{series}_zoom_segmented'

try:
overlay, mask, ventricle_pixels, _ = _get_overlay_from_dicom(slicer)
except KeyError:
Expand Down
4 changes: 2 additions & 2 deletions ml4cvd/test_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -25,14 +25,14 @@
f'language_1hot_window', shape=(32, 26),
interpretation=Interpretation.LANGUAGE,
channel_map={f'c_{i}': i for i in range(26)},
)
),
]
LANGUAGE_TMAP_1HOT_SOFTMAX = [
TensorMap(
f'language_1hot_out', shape=(26,),
interpretation=Interpretation.LANGUAGE,
channel_map={f'c_{i}': i for i in range(26)},
)
),
]

TMAPS_UP_TO_4D = CONTINUOUS_TMAPS[:-1] + CATEGORICAL_TMAPS[:-1]
Expand Down
Loading

0 comments on commit 18839e7

Please sign in to comment.