Skip to content

utils

check_field_filled(factsheet_dict, factsheet_path, value, empty='')

Check if the field in the factsheet file is filled or not.

Parameters:

Name Type Description Default
factsheet_dict dict

The factshett dict.

required
factsheet_path list

The factsheet field to check.

required
value float

The value to add in the field.

required
empty string

If the value could not be appended, the empty string is returned.

''

Returns:

Name Type Description
float

The value added in the factsheet or empty if the value could not be appened

Source code in nebula/addons/trustworthiness/utils.py
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
def check_field_filled(factsheet_dict, factsheet_path, value, empty=""):
    """
    Check if the field in the factsheet file is filled or not.

    Args:
        factsheet_dict (dict): The factshett dict.
        factsheet_path (list): The factsheet field to check.
        value (float): The value to add in the field.
        empty (string): If the value could not be appended, the empty string is returned.

    Returns:
        float: The value added in the factsheet or empty if the value could not be appened

    """
    if factsheet_dict[factsheet_path[0]][factsheet_path[1]]:
        return factsheet_dict[factsheet_path[0]][factsheet_path[1]]
    elif value != "" and value != "nan":
        if type(value) != str and type(value) != list:
            if math.isnan(value):
                return 0
            else:
                return value
        else:
            return value
    else:
        return empty

count_class_samples(scenario_name, dataloaders_files)

Counts the number of samples by class.

Parameters:

Name Type Description Default
scenario_name string

Name of the scenario.

required
dataloaders_files list

Files that contain the dataloaders.

required
Source code in nebula/addons/trustworthiness/utils.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
def count_class_samples(scenario_name, dataloaders_files):
    """
    Counts the number of samples by class.

    Args:
        scenario_name (string): Name of the scenario.
        dataloaders_files (list): Files that contain the dataloaders.

    """

    result = {}
    dataloaders = []

    for file in dataloaders_files:
        with open(file, "rb") as f:
            dataloader = pickle.load(f)
            dataloaders.append(dataloader)

    for dataloader in dataloaders:
        for batch, labels in dataloader:
            for b, label in zip(batch, labels, strict=False):
                l = hashids.encode(label.item())
                if l in result:
                    result[l] += 1
                else:
                    result[l] = 1

    name_file = f"{dirname}/files/{scenario_name}/count_class.json"
    with open(name_file, "w") as f:
        json.dump(result, f)

get_entropy(client_id, scenario_name, dataloader)

Get the entropy of each client in the scenario.

Parameters:

Name Type Description Default
client_id int

The client id.

required
scenario_name string

Name of the scenario.

required
dataloaders_files list

Files that contain the dataloaders.

required
Source code in nebula/addons/trustworthiness/utils.py
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
def get_entropy(client_id, scenario_name, dataloader):
    """
    Get the entropy of each client in the scenario.

    Args:
        client_id (int): The client id.
        scenario_name (string): Name of the scenario.
        dataloaders_files (list): Files that contain the dataloaders.

    """
    result = {}
    client_entropy = {}

    name_file = f"{dirname}/files/{scenario_name}/entropy.json"
    if os.path.exists(name_file):
        with open(name_file) as f:
            client_entropy = json.load(f)

    client_id_hash = hashids.encode(client_id)

    for batch, labels in dataloader:
        for b, label in zip(batch, labels, strict=False):
            l = hashids.encode(label.item())
            if l in result:
                result[l] += 1
            else:
                result[l] = 1

    n = len(dataloader)
    entropy_value = entropy([x / n for x in result.values()], base=2)
    client_entropy[client_id_hash] = entropy_value
    with open(name_file, "w") as f:
        json.dump(client_entropy, f)

get_input_value(input_docs, inputs, operation)

Gets the input value from input document and apply the metric operation on the value.

Parameters:

Name Type Description Default
inputs_docs map

The input document map.

required
inputs list

All the inputs.

required
operation string

The metric operation.

required

Returns:

Name Type Description
float

The metric value

Source code in nebula/addons/trustworthiness/utils.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
def get_input_value(input_docs, inputs, operation):
    """
    Gets the input value from input document and apply the metric operation on the value.

    Args:
        inputs_docs (map): The input document map.
        inputs (list): All the inputs.
        operation (string): The metric operation.

    Returns:
        float: The metric value

    """

    input_value = None
    args = []
    for i in inputs:
        source = i.get("source", "")
        field = i.get("field_path", "")
        input_doc = input_docs.get(source, None)
        if input_doc is None:
            logger.warning(f"{source} is null")
        else:
            input = get_value_from_path(input_doc, field)
            args.append(input)
    try:
        operationFn = getattr(calculation, operation)
        input_value = operationFn(*args)
    except TypeError:
        logger.warning(f"{operation} is not valid")

    return input_value

get_value_from_path(input_doc, path)

Gets the input value from input document by path.

Parameters:

Name Type Description Default
inputs_doc map

The input document map.

required
path string

The field name of the input value of interest.

required

Returns:

Name Type Description
float

The input value from the input document

Source code in nebula/addons/trustworthiness/utils.py
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
def get_value_from_path(input_doc, path):
    """
    Gets the input value from input document by path.

    Args:
        inputs_doc (map): The input document map.
        path (string): The field name of the input value of interest.

    Returns:
        float: The input value from the input document

    """

    d = input_doc
    for nested_key in path.split("/"):
        temp = d.get(nested_key)
        if isinstance(temp, dict):
            d = d.get(nested_key)
        else:
            return temp
    return None

read_csv(filename)

Read a CSV file.

Parameters:

Name Type Description Default
filename string

Name of the file.

required

Returns:

Name Type Description
object

The CSV readed.

Source code in nebula/addons/trustworthiness/utils.py
86
87
88
89
90
91
92
93
94
95
96
97
98
def read_csv(filename):
    """
    Read a CSV file.

    Args:
        filename (string): Name of the file.

    Returns:
        object: The CSV readed.

    """
    if exists(filename):
        return pd.read_csv(filename)

write_results_json(out_file, dict)

Writes the result to JSON.

Parameters:

Name Type Description Default
out_file string

The output file.

required
dict dict

The object to be witten into JSON.

required

Returns:

Name Type Description
float

The input value from the input document

Source code in nebula/addons/trustworthiness/utils.py
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
def write_results_json(out_file, dict):
    """
    Writes the result to JSON.

    Args:
        out_file (string): The output file.
        dict (dict): The object to be witten into JSON.

    Returns:
        float: The input value from the input document

    """

    with open(out_file, "a") as f:
        json.dump(dict, f, indent=4)