Skip to content

ProjectionPredictor#

mesh_predictor.ProjectionPredictor #

Bases: Predictor

Regression method to predict 2D projections from process parameters.

Derives from Predictor, where more useful methods are defined.

Source code in mesh_predictor/Regressor2D.py
  9
 10
 11
 12
 13
 14
 15
 16
 17
 18
 19
 20
 21
 22
 23
 24
 25
 26
 27
 28
 29
 30
 31
 32
 33
 34
 35
 36
 37
 38
 39
 40
 41
 42
 43
 44
 45
 46
 47
 48
 49
 50
 51
 52
 53
 54
 55
 56
 57
 58
 59
 60
 61
 62
 63
 64
 65
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
class ProjectionPredictor(Predictor):
    """
    Regression method to predict 2D projections from process parameters.

    Derives from Predictor, where more useful methods are defined.
    """

    def load_data(self, doe, data, process_parameters, position, output, categorical=[], index='doe_id', validation_split=0.1, validation_method="random", position_scaler='normal'):
        """
        Loads pandas Dataframes containing the data and preprocesses it.

        :param doe: pandas.Dataframe object containing the process parameters (design of experiments table).
        :param data: pandas.Dataframe object containing the experiments.
        :param process_parameters: list of process parameters to be used. The names must match the columns of the data file.
        :param categorical: list of process parameters that should be considered as categorical and one-hot encoded.
        :param position: position variables as a list. The names must match the columns of the csv file.
        :param output: output variable(s) to be predicted. The names must match the columns of the data file.
        :param index: name of the column in doe and data representing the design ID (default: 'doe_id')
        :param validation_split: percentage of the data used for validation (default: 0.1)
        :param validation_method: method to split the data for validation, either 'random' or 'leaveoneout' (default: 'random')
        :param position_scaler: normalization applied to the position attributes ('minmax' or 'normal', default 'normal')
        """

        self.has_config = True
        self.data_loaded = True

        # Attributes names
        self.process_parameters = process_parameters

        self.position_attributes = position
        if not len(self.position_attributes) == 2:
            print("Error: the position attribute must have two dimensions.")
            sys.exit()

        if isinstance(output, list): 
            self.output_attributes = output
        else:
            self.output_attributes = [output]

        self.categorical_attributes = categorical
        self.angle_input = False
        self.position_scaler = position_scaler

        self.doe_id = index
        self.validation_split = validation_split
        self.validation_method = validation_method

        # Process parameters
        self._preprocess_parameters(doe)

        # Expand the process parameters in the main df
        self._preprocess_variables(data)

        # Get numpy arrays
        self._make_arrays()


    def predict(self, process_parameters, positions, as_df=False):
        """
        Predicts the output variable for a given number of input positions (either uniformly distributed between the min/max values of each input dimension used for training, or a (N, 2) array).

        ```python
        x, y = reg.predict(process_parameters={...}, positions=(100, 100))
        ```

        or:

        ```python
        x, y = reg.predict(
            process_parameters={...}, 
            positions=pd.DataFrame(
                {
                    "u": np.linspace(0., 1. , 100), 
                    "v": np.linspace(0., 1. , 100)
                }
            ).to_numpy()
        )
        ```

        :param process_parameters: dictionary containing the value of all process parameters.
        :param positions: tuple of dimensions to be used for the prediction or (N, 2) numpy array of positions.
        :param as_df: whether the prediction should be returned as numpy arrays (False, default) or pandas dataframe (True).
        """

        if not self.has_config:
            print("Error: The data has not been loaded yet.")
            return

        if self.model is None:
            print("Error: no model has been trained yet.")
            return

        # Generate inputs
        if isinstance(positions, tuple):
            shape = positions
            nb_points = shape[0] * shape[1]

            x = np.linspace(self.min_values[self.position_attributes[0]], self.max_values[self.position_attributes[0]], shape[0])
            y = np.linspace(self.min_values[self.position_attributes[1]], self.max_values[self.position_attributes[1]], shape[1])

            samples = np.array([[i, j] for i in x for j in y])

        elif isinstance(positions, np.ndarray):

            nb_points, d = positions.shape
            shape = (nb_points, 1)
            if d != 2:
                print("ERROR: the positions must have the shape (N, 2).")
                return
            samples = positions


        # Process parameters
        X = np.empty((nb_points, 0))
        for idx, attr in enumerate(self.process_parameters):

            if attr in self.categorical_attributes:

                code = one_hot([process_parameters[attr]], self.categorical_values[attr])
                code = np.repeat(code, nb_points, axis=0)

                X = np.concatenate((X, code), axis=1)

            else:

                val = ((process_parameters[attr] - self.mean_values[attr] ) / self.std_values[attr]) * np.ones((nb_points, 1))

                X = np.concatenate((X, val ), axis=1)

        # Position attributes are last
        for i, attr in enumerate(self.position_attributes):
            if self.position_scaler == 'normal':
                values = (samples[:, i] - self.mean_values[attr] ) / self.std_values[attr]
            else:
                values = (samples[:, i] - self.min_values[attr] ) / (self.max_values[attr] - self.min_values[attr])

            X = np.concatenate((X, values.reshape((nb_points, 1))), axis=1)

        # Predict outputs and de-normalize
        y = self.model.predict(X, batch_size=self.batch_size).reshape((nb_points, len(self.output_attributes)))
        result = []
        for idx, attr in enumerate(self.output_attributes):
            result.append(self._rescale_output(attr, y[:, idx]).reshape(shape))

        # Return inputs and outputs
        if as_df:
            d = pd.DataFrame()
            for i, attr in enumerate(self.position_attributes):
                d[attr] = samples[:, i]
            for i, attr in enumerate(self.output_attributes):
                d[attr] = result[i]
            return d

        else:
            return samples, np.array(result)


    def _compare(self, doe_id):

        if self.model is None:
            print("Error: no model has been trained yet.")
            return

        if not doe_id in self.doe_ids:
            print("The experiment", doe_id, 'is not in the dataset.')
            return

        X = self.X[self.doe_id_list == doe_id, :]
        t = self.target[self.doe_id_list == doe_id, :]
        N, _ = t.shape
        for idx, attr in enumerate(self.output_attributes):
            t[:, idx] = self._rescale_output(attr, t[:, idx])

        positions = []
        for i, attr in enumerate(self.position_attributes):
            if self.position_scaler == 'normal':
                values = self.mean_values[attr] + X[:, i-2] * self.std_values[attr]
            else:
                values = self.min_values[attr] +  X[:, i-2] * (self.max_values[attr] - self.min_values[attr])
            positions.append(values)
        positions=np.array(positions)

        y = self.model.predict(X, batch_size=self.batch_size)


        for idx, attr in enumerate(self.output_attributes):
            y[:, idx] = self._rescale_output(attr, y[:, idx])

        import matplotlib.tri as tri
        triang = tri.Triangulation(X[:, -2], X[:, -1])

        for idx, attr in enumerate(self.output_attributes):
            plt.figure()
            plt.subplot(121)
            plt.tripcolor(triang, t[:, idx], shading='flat', vmin=t[:, idx].min(), vmax=t[:, idx].max())
            plt.colorbar()
            plt.title("Ground truth - " + attr)
            plt.subplot(122)
            plt.tripcolor(triang, y[:, idx], shading='flat', vmin=t[:, idx].min(), vmax=t[:, idx].max())
            plt.colorbar()
            plt.title("Prediction - " + attr)
            plt.tight_layout()

        return positions, t, y


    def compare_xyz(self, doe_id):
        """
        Creates a 3D point cloud compring the ground truth and the prediction on a provided experiment. 

        Works only when the output variables are [x, y, z] coordinates.

        :param doe_id: id of the experiment.
        """

        if self.model is None:
            print("Error: no model has been trained yet.")
            return

        if not doe_id in self.doe_ids:
            print("The experiment", doe_id, 'is not in the dataset.')
            return        

        X = self.X[self.doe_id_list == doe_id, :]
        t = self.target[self.doe_id_list == doe_id, :]
        N, _ = t.shape

        for idx, attr in enumerate(self.output_attributes):
            t[:, idx] = self._rescale_output(attr, t[:, idx])

        positions = []
        for i, attr in enumerate(self.position_attributes):
            if self.position_scaler == 'normal':
                values = self.mean_values[attr] + X[:, i-2] * self.std_values[attr]
            else:
                values = self.min_values[attr] +  X[:, i-2] * (self.max_values[attr] - self.min_values[attr])
            positions.append(values)
        positions=np.array(positions)

        y = self.model.predict(X, batch_size=self.batch_size)

        for idx, attr in enumerate(self.output_attributes):
            y[:, idx] = self._rescale_output(attr, y[:, idx])


        fig = plt.figure()
        ax = fig.add_subplot(121, projection='3d')
        ax.set_title("Ground truth")
        p = ax.scatter(
            t[:, 0],  
            t[:, 1],
            t[:, 2],
            s=0.005
        )
        ax = fig.add_subplot(122, projection='3d')
        ax.set_title("Prediction")
        p = ax.scatter(
            y[:, 0],  
            y[:, 1],
            y[:, 2],
            s=0.005
        )

        return positions, t, y

load_data(doe, data, process_parameters, position, output, categorical=[], index='doe_id', validation_split=0.1, validation_method='random', position_scaler='normal') #

Loads pandas Dataframes containing the data and preprocesses it.

Parameters:

Name Type Description Default
doe

pandas.Dataframe object containing the process parameters (design of experiments table).

required
data

pandas.Dataframe object containing the experiments.

required
process_parameters

list of process parameters to be used. The names must match the columns of the data file.

required
categorical

list of process parameters that should be considered as categorical and one-hot encoded.

[]
position

position variables as a list. The names must match the columns of the csv file.

required
output

output variable(s) to be predicted. The names must match the columns of the data file.

required
index

name of the column in doe and data representing the design ID (default: 'doe_id')

'doe_id'
validation_split

percentage of the data used for validation (default: 0.1)

0.1
validation_method

method to split the data for validation, either 'random' or 'leaveoneout' (default: 'random')

'random'
position_scaler

normalization applied to the position attributes ('minmax' or 'normal', default 'normal')

'normal'
Source code in mesh_predictor/Regressor2D.py
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
def load_data(self, doe, data, process_parameters, position, output, categorical=[], index='doe_id', validation_split=0.1, validation_method="random", position_scaler='normal'):
    """
    Loads pandas Dataframes containing the data and preprocesses it.

    :param doe: pandas.Dataframe object containing the process parameters (design of experiments table).
    :param data: pandas.Dataframe object containing the experiments.
    :param process_parameters: list of process parameters to be used. The names must match the columns of the data file.
    :param categorical: list of process parameters that should be considered as categorical and one-hot encoded.
    :param position: position variables as a list. The names must match the columns of the csv file.
    :param output: output variable(s) to be predicted. The names must match the columns of the data file.
    :param index: name of the column in doe and data representing the design ID (default: 'doe_id')
    :param validation_split: percentage of the data used for validation (default: 0.1)
    :param validation_method: method to split the data for validation, either 'random' or 'leaveoneout' (default: 'random')
    :param position_scaler: normalization applied to the position attributes ('minmax' or 'normal', default 'normal')
    """

    self.has_config = True
    self.data_loaded = True

    # Attributes names
    self.process_parameters = process_parameters

    self.position_attributes = position
    if not len(self.position_attributes) == 2:
        print("Error: the position attribute must have two dimensions.")
        sys.exit()

    if isinstance(output, list): 
        self.output_attributes = output
    else:
        self.output_attributes = [output]

    self.categorical_attributes = categorical
    self.angle_input = False
    self.position_scaler = position_scaler

    self.doe_id = index
    self.validation_split = validation_split
    self.validation_method = validation_method

    # Process parameters
    self._preprocess_parameters(doe)

    # Expand the process parameters in the main df
    self._preprocess_variables(data)

    # Get numpy arrays
    self._make_arrays()

predict(process_parameters, positions, as_df=False) #

Predicts the output variable for a given number of input positions (either uniformly distributed between the min/max values of each input dimension used for training, or a (N, 2) array).

x, y = reg.predict(process_parameters={...}, positions=(100, 100))

or:

x, y = reg.predict(
    process_parameters={...}, 
    positions=pd.DataFrame(
        {
            "u": np.linspace(0., 1. , 100), 
            "v": np.linspace(0., 1. , 100)
        }
    ).to_numpy()
)

Parameters:

Name Type Description Default
process_parameters

dictionary containing the value of all process parameters.

required
positions

tuple of dimensions to be used for the prediction or (N, 2) numpy array of positions.

required
as_df

whether the prediction should be returned as numpy arrays (False, default) or pandas dataframe (True).

False
Source code in mesh_predictor/Regressor2D.py
 66
 67
 68
 69
 70
 71
 72
 73
 74
 75
 76
 77
 78
 79
 80
 81
 82
 83
 84
 85
 86
 87
 88
 89
 90
 91
 92
 93
 94
 95
 96
 97
 98
 99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
def predict(self, process_parameters, positions, as_df=False):
    """
    Predicts the output variable for a given number of input positions (either uniformly distributed between the min/max values of each input dimension used for training, or a (N, 2) array).

    ```python
    x, y = reg.predict(process_parameters={...}, positions=(100, 100))
    ```

    or:

    ```python
    x, y = reg.predict(
        process_parameters={...}, 
        positions=pd.DataFrame(
            {
                "u": np.linspace(0., 1. , 100), 
                "v": np.linspace(0., 1. , 100)
            }
        ).to_numpy()
    )
    ```

    :param process_parameters: dictionary containing the value of all process parameters.
    :param positions: tuple of dimensions to be used for the prediction or (N, 2) numpy array of positions.
    :param as_df: whether the prediction should be returned as numpy arrays (False, default) or pandas dataframe (True).
    """

    if not self.has_config:
        print("Error: The data has not been loaded yet.")
        return

    if self.model is None:
        print("Error: no model has been trained yet.")
        return

    # Generate inputs
    if isinstance(positions, tuple):
        shape = positions
        nb_points = shape[0] * shape[1]

        x = np.linspace(self.min_values[self.position_attributes[0]], self.max_values[self.position_attributes[0]], shape[0])
        y = np.linspace(self.min_values[self.position_attributes[1]], self.max_values[self.position_attributes[1]], shape[1])

        samples = np.array([[i, j] for i in x for j in y])

    elif isinstance(positions, np.ndarray):

        nb_points, d = positions.shape
        shape = (nb_points, 1)
        if d != 2:
            print("ERROR: the positions must have the shape (N, 2).")
            return
        samples = positions


    # Process parameters
    X = np.empty((nb_points, 0))
    for idx, attr in enumerate(self.process_parameters):

        if attr in self.categorical_attributes:

            code = one_hot([process_parameters[attr]], self.categorical_values[attr])
            code = np.repeat(code, nb_points, axis=0)

            X = np.concatenate((X, code), axis=1)

        else:

            val = ((process_parameters[attr] - self.mean_values[attr] ) / self.std_values[attr]) * np.ones((nb_points, 1))

            X = np.concatenate((X, val ), axis=1)

    # Position attributes are last
    for i, attr in enumerate(self.position_attributes):
        if self.position_scaler == 'normal':
            values = (samples[:, i] - self.mean_values[attr] ) / self.std_values[attr]
        else:
            values = (samples[:, i] - self.min_values[attr] ) / (self.max_values[attr] - self.min_values[attr])

        X = np.concatenate((X, values.reshape((nb_points, 1))), axis=1)

    # Predict outputs and de-normalize
    y = self.model.predict(X, batch_size=self.batch_size).reshape((nb_points, len(self.output_attributes)))
    result = []
    for idx, attr in enumerate(self.output_attributes):
        result.append(self._rescale_output(attr, y[:, idx]).reshape(shape))

    # Return inputs and outputs
    if as_df:
        d = pd.DataFrame()
        for i, attr in enumerate(self.position_attributes):
            d[attr] = samples[:, i]
        for i, attr in enumerate(self.output_attributes):
            d[attr] = result[i]
        return d

    else:
        return samples, np.array(result)

compare_xyz(doe_id) #

Creates a 3D point cloud compring the ground truth and the prediction on a provided experiment.

Works only when the output variables are [x, y, z] coordinates.

Parameters:

Name Type Description Default
doe_id

id of the experiment.

required
Source code in mesh_predictor/Regressor2D.py
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
def compare_xyz(self, doe_id):
    """
    Creates a 3D point cloud compring the ground truth and the prediction on a provided experiment. 

    Works only when the output variables are [x, y, z] coordinates.

    :param doe_id: id of the experiment.
    """

    if self.model is None:
        print("Error: no model has been trained yet.")
        return

    if not doe_id in self.doe_ids:
        print("The experiment", doe_id, 'is not in the dataset.')
        return        

    X = self.X[self.doe_id_list == doe_id, :]
    t = self.target[self.doe_id_list == doe_id, :]
    N, _ = t.shape

    for idx, attr in enumerate(self.output_attributes):
        t[:, idx] = self._rescale_output(attr, t[:, idx])

    positions = []
    for i, attr in enumerate(self.position_attributes):
        if self.position_scaler == 'normal':
            values = self.mean_values[attr] + X[:, i-2] * self.std_values[attr]
        else:
            values = self.min_values[attr] +  X[:, i-2] * (self.max_values[attr] - self.min_values[attr])
        positions.append(values)
    positions=np.array(positions)

    y = self.model.predict(X, batch_size=self.batch_size)

    for idx, attr in enumerate(self.output_attributes):
        y[:, idx] = self._rescale_output(attr, y[:, idx])


    fig = plt.figure()
    ax = fig.add_subplot(121, projection='3d')
    ax.set_title("Ground truth")
    p = ax.scatter(
        t[:, 0],  
        t[:, 1],
        t[:, 2],
        s=0.005
    )
    ax = fig.add_subplot(122, projection='3d')
    ax.set_title("Prediction")
    p = ax.scatter(
        y[:, 0],  
        y[:, 1],
        y[:, 2],
        s=0.005
    )

    return positions, t, y