Skip to content

MeshPredictor#

mesh_predictor.MeshPredictor #

Bases: Predictor

Regression method to predict 3D projections from process parameters.

Derives from Predictor, where more useful methods are defined.

Source code in mesh_predictor/Regressor3D.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
class MeshPredictor(Predictor):
    """
    Regression method to predict 3D projections from process parameters.

    Derives from Predictor, where more useful methods are defined.
    """

    def load_data(self, doe, data, process_parameters, position, output, categorical=[], index='doe_id', validation_split=0.1, validation_method="random", position_scaler='normal'):
        """
        Loads pandas Dataframes containing the data and preprocesses it.

        :param doe: pandas.Dataframe object containing the process parameters (design of experiments table).
        :param data: pandas.Dataframe object containing the experiments.
        :param process_parameters: list of process parameters to be used. The names must match the columns of the csv file.
        :param categorical: list of process parameters that should be considered as categorical nad one-hot encoded.
        :param position: position variables as a list. The name must match one column of the csv file.
        :param output: output variable(s) to be predicted. The name must match one column of the csv file.
        :param index: name of the column in doe and data representing the design ID (default: 'doe_id')
        :param validation_split: percentage of the data used for validation (default: 0.1)
        :param validation_method: method to split the data for validation, either 'random' or 'leaveoneout' (default: 'random')
        :param position_scaler: normalization applied to the position attributes ('minmax' or 'normal', default 'normal')
        """

        self.has_config = True
        self.data_loaded = True

        # Attributes names
        self.process_parameters = process_parameters

        self.position_attributes = position
        if not len(self.position_attributes) == 3:
            print("Error: the position attribute must have three dimensions.")
            sys.exit()

        if isinstance(output, list): 
            self.output_attributes = output
        else:
            self.output_attributes = [output]

        self.categorical_attributes = categorical
        self.angle_input = False
        self.position_scaler = position_scaler

        self.doe_id = index
        self.validation_split = validation_split
        self.validation_method = validation_method

        # Process parameters
        self._preprocess_parameters(doe)

        # Expand the process parameters in the main df
        self._preprocess_variables(data)

        # Get numpy arrays
        self._make_arrays()


    def predict(self, process_parameters, positions, as_df=False):
        """
        Predicts the output variables for each node specified in coordinates.

        ```python
        reg.predict(process_parameters={...}, positions=...)
        ```

        :param process_parameters: dictionary containing the value of all process parameters.
        :param positions: (N, 3) numpy array containing the xyz coordinates of each node that should be predicted. The column names must match .
        :param as_df: whether the prediction should be returned as numpy arrays (False, default) or pandas dataframe (True).
        """

        if not self.has_config:
            print("Error: The data has not been loaded yet.")
            return

        if self.model is None:
            print("Error: no model has been trained yet.")
            return

        nb_points, _ = positions.shape

        X = np.empty((nb_points, 0))

        for idx, attr in enumerate(self.process_parameters):

            if attr in self.categorical_attributes:

                code = one_hot([process_parameters[attr]], self.categorical_values[attr])
                code = np.repeat(code, nb_points, axis=0)

                X = np.concatenate((X, code), axis=1)

            else:

                val = ((process_parameters[attr] - self.mean_values[attr] ) / self.std_values[attr]) * np.ones((nb_points, 1))

                X = np.concatenate((X, val ), axis=1)

        # Position attributes are last
        for i, attr in enumerate(self.position_attributes):

            if self.position_scaler == 'normal':
                values = (positions[:, i] - self.mean_values[attr] ) / self.std_values[attr]
            else:
                values = (positions[:, i] - self.min_values[attr] ) / (self.max_values[attr] - self.min_values[attr])

            X = np.concatenate((X, values.reshape((nb_points, 1))), axis=1)


        y = self.model.predict(X, batch_size=self.batch_size)


        for idx, attr in enumerate(self.output_attributes):

            y[:, idx] = self._rescale_output(attr, y[:, idx])

        # Return inputs and outputs
        if as_df:
            d = pd.DataFrame()
            for i, attr in enumerate(self.position_attributes):
                d[attr] = positions[:, i]
            for i, attr in enumerate(self.output_attributes):
                d[attr] = y[:, i]
            return d

        else:
            return positions, y


    def _compare(self, doe_id):

        if self.model is None:
            print("Error: no model has been trained yet.")
            return

        if not doe_id in self.doe_ids:
            print("The experiment", doe_id, 'is not in the dataset.')
            return

        X = self.X[self.doe_id_list == doe_id, :]
        t = self.target[self.doe_id_list == doe_id, :]
        N, _ = t.shape

        for idx, attr in enumerate(self.output_attributes):
            t[:, idx] = self._rescale_output(attr, t[:, idx])

        positions = []
        for i, attr in enumerate(self.position_attributes):
            if self.position_scaler == 'normal':
                values = self.mean_values[attr] + X[:, i-3] * self.std_values[attr]
            else:
                values = self.min_values[attr] +  X[:, i-3] * (self.max_values[attr] - self.min_values[attr])
            positions.append(values)
        positions=np.array(positions)


        y = self.model.predict(X, batch_size=self.batch_size)


        for idx, attr in enumerate(self.output_attributes):
            y[:, idx] = self._rescale_output(attr, y[:, idx])


        for idx, attr in enumerate(self.output_attributes):


            fig = plt.figure()
            ax = fig.add_subplot(111, projection='3d')
            p = ax.scatter(X[:, -3], X[:, -2], X[:, -1], c=t[:, idx], 
                cmap='seismic', vmin=t[:, idx].min(), vmax=t[:, idx].max()) 
            fig.colorbar(p, ax=ax)
            ax.set_title("Ground truth - " + attr)

            fig = plt.figure()
            ax = fig.add_subplot(111, projection='3d')
            p = ax.scatter(X[:, -3], X[:, -2], X[:, -1], c=y[:, idx], 
                cmap='seismic', vmin=t[:, idx].min(), vmax=t[:, idx].max()) 
            fig.colorbar(p, ax=ax)
            ax.set_title("Prediction - " + attr)


        return positions, t, y

load_data(doe, data, process_parameters, position, output, categorical=[], index='doe_id', validation_split=0.1, validation_method='random', position_scaler='normal') #

Loads pandas Dataframes containing the data and preprocesses it.

Parameters:

Name Type Description Default
doe

pandas.Dataframe object containing the process parameters (design of experiments table).

required
data

pandas.Dataframe object containing the experiments.

required
process_parameters

list of process parameters to be used. The names must match the columns of the csv file.

required
categorical

list of process parameters that should be considered as categorical nad one-hot encoded.

[]
position

position variables as a list. The name must match one column of the csv file.

required
output

output variable(s) to be predicted. The name must match one column of the csv file.

required
index

name of the column in doe and data representing the design ID (default: 'doe_id')

'doe_id'
validation_split

percentage of the data used for validation (default: 0.1)

0.1
validation_method

method to split the data for validation, either 'random' or 'leaveoneout' (default: 'random')

'random'
position_scaler

normalization applied to the position attributes ('minmax' or 'normal', default 'normal')

'normal'
Source code in mesh_predictor/Regressor3D.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def load_data(self, doe, data, process_parameters, position, output, categorical=[], index='doe_id', validation_split=0.1, validation_method="random", position_scaler='normal'):
    """
    Loads pandas Dataframes containing the data and preprocesses it.

    :param doe: pandas.Dataframe object containing the process parameters (design of experiments table).
    :param data: pandas.Dataframe object containing the experiments.
    :param process_parameters: list of process parameters to be used. The names must match the columns of the csv file.
    :param categorical: list of process parameters that should be considered as categorical nad one-hot encoded.
    :param position: position variables as a list. The name must match one column of the csv file.
    :param output: output variable(s) to be predicted. The name must match one column of the csv file.
    :param index: name of the column in doe and data representing the design ID (default: 'doe_id')
    :param validation_split: percentage of the data used for validation (default: 0.1)
    :param validation_method: method to split the data for validation, either 'random' or 'leaveoneout' (default: 'random')
    :param position_scaler: normalization applied to the position attributes ('minmax' or 'normal', default 'normal')
    """

    self.has_config = True
    self.data_loaded = True

    # Attributes names
    self.process_parameters = process_parameters

    self.position_attributes = position
    if not len(self.position_attributes) == 3:
        print("Error: the position attribute must have three dimensions.")
        sys.exit()

    if isinstance(output, list): 
        self.output_attributes = output
    else:
        self.output_attributes = [output]

    self.categorical_attributes = categorical
    self.angle_input = False
    self.position_scaler = position_scaler

    self.doe_id = index
    self.validation_split = validation_split
    self.validation_method = validation_method

    # Process parameters
    self._preprocess_parameters(doe)

    # Expand the process parameters in the main df
    self._preprocess_variables(data)

    # Get numpy arrays
    self._make_arrays()

predict(process_parameters, positions, as_df=False) #

Predicts the output variables for each node specified in coordinates.

reg.predict(process_parameters={...}, positions=...)

Parameters:

Name Type Description Default
process_parameters

dictionary containing the value of all process parameters.

required
positions

(N, 3) numpy array containing the xyz coordinates of each node that should be predicted. The column names must match .

required
as_df

whether the prediction should be returned as numpy arrays (False, default) or pandas dataframe (True).

False
Source code in mesh_predictor/Regressor3D.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
def predict(self, process_parameters, positions, as_df=False):
    """
    Predicts the output variables for each node specified in coordinates.

    ```python
    reg.predict(process_parameters={...}, positions=...)
    ```

    :param process_parameters: dictionary containing the value of all process parameters.
    :param positions: (N, 3) numpy array containing the xyz coordinates of each node that should be predicted. The column names must match .
    :param as_df: whether the prediction should be returned as numpy arrays (False, default) or pandas dataframe (True).
    """

    if not self.has_config:
        print("Error: The data has not been loaded yet.")
        return

    if self.model is None:
        print("Error: no model has been trained yet.")
        return

    nb_points, _ = positions.shape

    X = np.empty((nb_points, 0))

    for idx, attr in enumerate(self.process_parameters):

        if attr in self.categorical_attributes:

            code = one_hot([process_parameters[attr]], self.categorical_values[attr])
            code = np.repeat(code, nb_points, axis=0)

            X = np.concatenate((X, code), axis=1)

        else:

            val = ((process_parameters[attr] - self.mean_values[attr] ) / self.std_values[attr]) * np.ones((nb_points, 1))

            X = np.concatenate((X, val ), axis=1)

    # Position attributes are last
    for i, attr in enumerate(self.position_attributes):

        if self.position_scaler == 'normal':
            values = (positions[:, i] - self.mean_values[attr] ) / self.std_values[attr]
        else:
            values = (positions[:, i] - self.min_values[attr] ) / (self.max_values[attr] - self.min_values[attr])

        X = np.concatenate((X, values.reshape((nb_points, 1))), axis=1)


    y = self.model.predict(X, batch_size=self.batch_size)


    for idx, attr in enumerate(self.output_attributes):

        y[:, idx] = self._rescale_output(attr, y[:, idx])

    # Return inputs and outputs
    if as_df:
        d = pd.DataFrame()
        for i, attr in enumerate(self.position_attributes):
            d[attr] = positions[:, i]
        for i, attr in enumerate(self.output_attributes):
            d[attr] = y[:, i]
        return d

    else:
        return positions, y