Skip to content

analysis

Analysis

Tools for analysing GPR predictions. Author: Audun Skau Hansen

choose_n_most_distant(all_x, n)

choose n measurements such that the total distance between the measurements is at a maximum Author: Audun

Source code in btjenesten/analysis.py
183
184
185
186
187
188
189
190
191
192
def choose_n_most_distant(all_x, n):
    """
    choose n measurements such that the total distance between the measurements is at a maximum
    Author: Audun
    """
    d = np.sum((all_x[:, None] - all_x[None, :])**2, axis = 2)

    total_d = np.sum(d, axis = 0)

    return np.argsort(total_d)[-n:]

data_projection(regressor, axes=[0], resolution=20, center=None)

Project high-dimensional regressor predictions onto smaller spaces.

Author: Audun Skau Hansen

Arguments

regressor = a gpr regressor axies = indices of axis to sweep over resolution = resolution of sweeps along all axes center = (optional) set center point of sweep (default is middle of the region)

Examples

x, y = data_projection(regressor, axes = [0]) -> all axes except 0 are kept fixed at the mean values (or center values), while 0 is allowed to vary inside the fitting region. plot(x[0], y)

x, y = data_projection(regressor, axes = [1,2]) -> all axes except 1 and 2 are fixed to mean values (or center values) while 1 and 2 are allowed to map a surface in the fitting region. contourf(x[0], x[1], y)

Source code in btjenesten/analysis.py
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
def data_projection(regressor, axes = [0], resolution = 20, center = None):
    """
    Project high-dimensional regressor predictions onto smaller 
    spaces.

    Author: Audun Skau Hansen

    Arguments
    ===

    regressor  = a gpr regressor 
    axies      = indices of axis to sweep over
    resolution = resolution of sweeps along all axes
    center     = (optional) set center point of sweep (default is middle of the region)

    Examples
    ===

    x, y = data_projection(regressor, axes = [0]) -> 
    all axes except 0 are kept fixed at the mean values (or center values), 
    while 0 is allowed to vary inside the fitting region.
    plot(x[0], y) 

    x, y = data_projection(regressor, axes = [1,2]) -> 
    all axes except 1 and 2 are fixed to mean values (or center values)
    while 1 and 2 are allowed to map a surface in the fitting region.
    contourf(x[0], x[1], y)

    """


    # extract fitting regions (only internal datapoints will be predicted)
    mean  = np.mean(regressor.recover(regressor.training_data_X), axis =0 )
    if center is not None:
        mean = np.array(center)

    #print(regressor.recover(regressor.training_data_X))
    bound = np.max(regressor.recover(regressor.training_data_X), axis = 0)-np.min(regressor.recover(regressor.training_data_X), axis = 0)
    #print(mean, bound)
    lower_bound = mean - bound
    upper_bound = mean + bound


    # create a grid wherein the datapoints are interpolated
    grid = []
    for i in range(len(lower_bound)):
        grid.append(np.linspace(-bound[i], bound[i], resolution))

    #if center is None:
    #    center = np.zeros(len(mean), dtype = float)


    mgrid = list(mean) #[0 for i in range(len(center))]
    for i in range(len(axes)):
        mgrid[axes[i]] = grid[axes[i]]  + mean[axes[i]]



    prediction_grid = np.array(np.meshgrid(*mgrid)).reshape(len(mean), -1)

    # return prediction to user
    x = [] # list to contain the relevant grid-points    
    for i in range(len(axes)):
        x.append(mgrid[axes[i]])

    return x, regressor.predict(prediction_grid.T).reshape([resolution for i in range(len(axes))])

html_table(values, columns=None, rows=None)

Simple HTML table generator Author: Audun Skau Hansen

Source code in btjenesten/analysis.py
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
def html_table(values, columns = None, rows = None):
    """
    Simple HTML table generator
    Author: Audun Skau Hansen
    """
    ret = """<table>\n"""

    nr = values.shape[0]
    nc = values.shape[1]

    if columns is not None:
        ret += "<tr>\n"
        ret += "<th></th>\n"

        for c in range(len(columns)):
            ret += "<th>%s</th>\n" % columns[c]

        ret += "</tr>\n"

    for r in range(nr):
        ret += "<tr>\n"
        if columns is not None:
            if rows is not None:
                ret += "<th>%s</th>\n" % rows[r]
            else:
                ret += "<th></th>\n"
        else:
            if rows is not None:
                ret += "<th>%s</th>\n" % rows[r]
            else:
                ret += "<th></th>\n"

        for c in range(nc):
            ret += "<th>%s</th>\n" % values[r, c]
        ret += "</tr>\n"
    ret += """</table>\n"""

    return HTML(ret)

one_dimensional_plots(x_data, proposed_new_x, reg, y_labels, x_labels)

Author: Ayla S. Coder

Source code in btjenesten/analysis.py
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
def one_dimensional_plots(x_data, proposed_new_x, reg, y_labels, x_labels):
    """
    Author: Ayla S. Coder
    """
    import matplotlib.pyplot as plt
    # Where y_labels and x_labels are arrays of strings 

    plt.figure(figsize=(8, 5)) # Sets the figure size

    rows = int(len(x_data[0])/3) # The amount of rows of plots in the figure

    dimensions = len(x_data[0]) # The amount of dimensions evaluated.

    # For loop to set up each subplot
    for i in range (dimensions):

        # This function is documented on Btjeneste website. If you're curious, run the command help(bt.analysis.data_projection)
        x,y = data_projection(reg, axes = [i], center = proposed_new_x, resolution = 100)

        # Sets up the i-th subplot
        ax = plt.subplot(rows, dimensions, i+1)

        ax.set_ylabel(y_labels[i], fontsize = 15)
        ax.set_xlabel(x_labels[i], fontsize = 15)
        ax.grid()
       # What is plotted in each plot:
        ax.plot(x[0], y)

    plt.show()

parameter_optimization(x_data, y_data, training_fraction=0.8, normalize_y=True, params=None, training_subset=None)

Draft for a parameter optimization scheme Author: Audun Skau Hansen

Takes as input the dataset (x_data, y_data) and the fraction (a float in the interval 0.0 - 1.0) of datapoints to use as training data.

Source code in btjenesten/analysis.py
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
def parameter_optimization(x_data, y_data, training_fraction = 0.8, normalize_y = True, params = None, training_subset = None):
    """
    Draft for a parameter optimization scheme
    Author: Audun Skau Hansen

    Takes as input the dataset (x_data, y_data) and the 
    fraction (a float in the interval 0.0 - 1.0) of datapoints
    to use as training data.
    """
    n = int(training_fraction*x_data.shape[0])

    # special first iteration
    if params is None:
        params = np.ones(x_data.shape[1])*-2.0 #*0.001
    #training_subset = np.random.choice(x_data.shape[0], n, replace = False)
    if training_subset is None:
        training_subset = np.ones(x_data.shape[0], dtype = bool)
        training_subset[n:] = False
    else:
        if len(training_subset)<len(y_data):
            # assume index element array
            ts = np.zeros(len(y_data), dtype = bool)
            ts[training_subset] = True
            training_subset = ts


    #print(training_subset)
    #print(x_data[training_subset])

    #print(training_subset)
    y_data_n = y_data*1
    if normalize_y:
        y_data_n*=y_data_n.max()**-1

    def residual(params, x_data = x_data, y_data=y_data_n, training_subset = training_subset):
        test_subset = np.ones(x_data.shape[0], dtype = bool)
        test_subset[training_subset] = False
        regressor = Regressor(x_data[training_subset] , y_data[training_subset]) 
        regressor.params = 10**params
        energy = np.sum((regressor.predict(x_data[test_subset]) - y_data[test_subset])**2)
        return energy


    ret = minimize(residual, params)
    #print(ret)
    return 10**ret["x"]

parameter_tuner_3d(all_x, all_y, n)

Interactive (widget for Jupyter environments) parameter tuner for the gpr module

Authors: Audun Skau Hansen and Ayla S. Coder

Source code in btjenesten/analysis.py
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
def parameter_tuner_3d(all_x, all_y, n):
    """
    Interactive (widget for Jupyter environments) parameter tuner 
    for the gpr module 

    Authors: Audun Skau Hansen and Ayla S. Coder 
    """


    import matplotlib.pyplot as plt
    from matplotlib.widgets import Slider, Button



    training_x = all_x[n]
    training_y = all_y[n]

    regressor = Regressor(training_x, training_y)


    # The parametrized function to be plotted
    def f(params1, params2, params3):
        regressor.params = 10**np.array([params1, params2, params3])
        return regressor.predict(all_x)



    # Create the figure and the line that we will manipulate
    fig, ax = plt.subplots()
    plt.plot(np.arange(len(all_y))[n], all_y[n], "o", markersize = 10, label = "training data", color = (0,0,.5))
    plt.plot(all_y, "o", label = "true values", color = (.9,.2,.4))
    plt.legend()
    line, = plt.plot( f(1,1,1), ".-", lw=1, color = (.9,.9,.2))

    ax.set_xlabel('Time [s]')

    # adjust the main plot to make room for the sliders
    plt.subplots_adjust(left=0.4, bottom=0.25)
    """
    # Make a horizontal slider to control the frequency.
    axfreq = plt.axes([0.25, 0.1, 0.65, 0.03])
    freq_slider = Slider(
        ax=axfreq,
        label='Frequency [Hz]',
        valmin=0.1,
        valmax=30,
        valinit=init_frequency,
    )
    """

    # Make a vertically oriented slider to control the amplitude
    param1 = plt.axes([0.1, 0.3, 0.02, 0.5])
    param_slider1 = Slider(
        ax=param1,
        label="log(P1)",
        valmin=-10,
        valmax=10,
        valinit=1.0,
        orientation="vertical"
    )

    # Make a vertically oriented slider to control the amplitude
    param2 = plt.axes([0.2, 0.3, 0.02, 0.5])
    param_slider2 = Slider(
        ax=param2,
        label="log(P2)",
        valmin=-10,
        valmax=10,
        valinit=1.0,
        orientation="vertical"
    )

    # Make a vertically oriented slider to control the amplitude
    param3 = plt.axes([0.3, 0.3, 0.02, 0.5])
    param_slider3 = Slider(
        ax=param3,
        label="log(P3)",
        valmin=-10,
        valmax=10,
        valinit=1.0,
        orientation="vertical"
    )



    # The function to be called anytime a slider's value changes
    def update(val):
        line.set_ydata( f(param_slider1.val,param_slider2.val,param_slider3.val)) 
        fig.canvas.draw_idle()



    # register the update function with each slider
    #freq_slider.on_changed(update)
    param_slider1.on_changed(update)
    param_slider2.on_changed(update)
    param_slider3.on_changed(update)

    # Create a `matplotlib.widgets.Button` to reset the sliders to initial values.
    resetax = plt.axes([0.8, 0.025, 0.1, 0.04])
    button = Button(resetax, 'Reset', hovercolor='0.975')


    def reset(event):
        param_slider1.reset()
        param_slider2.reset()
        param_slider3.reset()

    button.on_clicked(reset)



    plt.show()

remove_redundancy(x_train, y_train, tol=1e-07)

extract unique columns of x_train (and corresponding elements in y_train)

Author: Audun Skau Hansen

Source code in btjenesten/analysis.py
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
def remove_redundancy(x_train, y_train, tol = 10e-8):
    """
    extract unique columns of x_train (and corresponding elements in y_train)

    Author: Audun Skau Hansen

    """


    ns = x_train.shape[0] #number of measurements

    # compute the "euclidean distance"
    d = np.sum((x_train[:, None] - x_train[None, :])**2, axis = 2)



    active = np.ones(ns, dtype = bool)


    unique_training_x = []
    unique_training_y = []

    for i in range(ns):

        distances = d[i]

        da = distances[active]
        ia = np.arange(ns)[active]

        elms = ia[da<tol]
        active[elms] = False

        if len(elms)>0:
            unique_training_x.append(x_train[elms[0]])
            unique_training_y.append(np.mean(y_train[elms], axis = 0))


    return np.array(unique_training_x), np.array(unique_training_y)

show_3d_sample_box(all_x)

a "hack" for showing the measurement setup in 3D using the bubblebox module / evince Author: Audun

Source code in btjenesten/analysis.py
171
172
173
174
175
176
177
178
179
180
181
def show_3d_sample_box(all_x):
    """
    a "hack" for showing the measurement setup in 3D using the bubblebox module / evince
    Author: Audun
    """
    import bubblebox as bb
    b = bb.mdbox(n_bubbles = all_x.shape[0], size = (4,4,4))
    pos = all_x - np.min(all_x, axis = 0)[None, :]
    pos = pos*np.max(pos, axis = 0)[None, :]**-1
    b.pos = 8*(pos.T - np.ones(3)[:, None]*.5)
    return b