|
1 | | -'''Short demonstration of Python for scientific data analysis |
2 | | -
|
3 | | -This script covers the following points: |
4 | | -* Plotting a sine wave |
5 | | -* Generating a column matrix of data |
6 | | -* Writing data to a text-file, and reading data from a text-file |
7 | | -* Waiting for a button-press to continue the program exectution |
8 | | - (Note: this does NOT work in ipython, if you run it with inline figures!) |
9 | | -* Using a dictionary, which is similar to MATLAB structures |
10 | | -* Extracting data which fulfill a certain condition |
11 | | -* Calculating the best-fit-line to noisy data |
12 | | -* Formatting text-output |
13 | | -* Waiting for a keyboard-press |
14 | | -* Calculating confidence intervals for line-fits |
15 | | -* Saving figures |
16 | | -
|
17 | | -For such a short program, the definition of a "main" function, and calling |
18 | | -it by default when the module is imported by the main program, is a bit |
19 | | -superfluous. But it shows good Python coding style. |
20 | | -''' |
21 | | - |
22 | | -# Copyright(c) 2017, Thomas Haslwanter. All rights reserved, under the CC BY-SA 4.0 International License |
23 | | - |
24 | | -# In contrast to MATLAB, you explicitly have to load the modules that you need. |
25 | | -import numpy as np |
26 | | -import matplotlib.pyplot as plt |
27 | | - |
28 | | -def main(): |
29 | | - '''Define the main function. ''' |
30 | | - |
31 | | - # Create a sine-wave |
32 | | - t = np.arange(0,10,0.1) |
33 | | - x = np.sin(t) |
34 | | - |
35 | | - # Save the data in a text-file, in column form |
36 | | - # The formatting is a bit clumsy: data are by default row variables; so to |
37 | | - # get a matrix, you stack the two rows above each other, and then transpose |
38 | | - # the matrix |
39 | | - outFile = 'test.txt' |
40 | | - np.savetxt(outFile, np.vstack([t,x]).T) |
41 | | - |
42 | | - # Read the data into a different variable |
43 | | - inData = np.loadtxt(outFile) |
44 | | - t2 = inData[:,0] # Note that Python starts at "0"! |
45 | | - x2 = inData[:,1] |
46 | | - |
47 | | - # Plot the data, and wait for the user to click |
48 | | - plt.plot(t2,x2) |
49 | | - plt.title('Hit any key to continue') |
50 | | - plt.waitforbuttonpress() |
51 | | - |
52 | | - # Generate a noisy line |
53 | | - t = np.arange(-100,100) |
54 | | - # use a Python "dictionary" for named variables |
55 | | - par = {'offset':100, 'slope':0.5, 'noiseAmp':4} |
56 | | - x = par['offset'] + par['slope']*t + par['noiseAmp']*np.random.randn(len(t)) |
57 | | - |
58 | | - # Select "late" values, i.e. with t>10 |
59 | | - xHigh = x[t>10] |
60 | | - tHigh = t[t>10] |
61 | | - |
62 | | - # Plot the "late" data |
63 | | - plt.close() |
64 | | - plt.plot(tHigh, xHigh) |
65 | | - |
66 | | - # Determine the best-fit line |
67 | | - # To do so, you have to generate a matrix with "time" in the first |
68 | | - # column, and a column of "1" in the second column: |
69 | | - xMat = np.vstack((tHigh, np.ones_like(tHigh))).T |
70 | | - slope, intercept = np.linalg.lstsq(xMat, xHigh)[0] |
71 | | - |
72 | | - # Show and plot the fit, and save it to a PNG-file with a medium resolution. |
73 | | - # The "modern" way of Python-formatting is used |
74 | | - plt.plot(tHigh, intercept + slope*tHigh, 'r') |
75 | | - plt.title('Hit any key to continue') |
76 | | - plt.savefig('linefit.png', dpi=200) |
77 | | - plt.waitforbuttonpress() |
78 | | - plt.close() |
79 | | - print(('Fit line: intercept = {0:5.3f}, and slope = {1:5.3f}'.format(intercept, slope))) |
80 | | - |
81 | | - # If you want to know confidence intervals, best switch to "pandas" |
82 | | - # Note that this is an advanced topic, and requires new data structures |
83 | | - # such ad "DataFrames" and "ordinary-least-squares" or "ols-models". |
84 | | - |
85 | | - import pandas as pd |
86 | | - import statsmodels.formula.api as smf |
87 | | - |
88 | | - # Put the data into a pandas DataFrame |
89 | | - myDict = {'x':tHigh, 'y':xHigh} |
90 | | - df = pd.DataFrame(myDict) |
91 | | - |
92 | | - # Fit the model: here the "formula"-syntax commonly used in statistics is employed |
93 | | - # 'y~x' means "y is a linear function of x, taking a possible offset into consideration" |
94 | | - results = smf.ols('y~x', data=df).fit() |
95 | | - |
96 | | - # Print the results |
97 | | - print(results.summary()) |
98 | | - #raw_input('These are the summary results from Pandas - Hit any key to continue') |
99 | | - |
100 | | - |
101 | | -if __name__=='__main__': |
102 | | - main() # Execute the main function |
| 1 | +'''Short demonstration of Python for scientific data analysis |
| 2 | +
|
| 3 | +This script covers the following points: |
| 4 | +* Plotting a sine wave |
| 5 | +* Generating a column matrix of data |
| 6 | +* Writing data to a text-file, and reading data from a text-file |
| 7 | +* Waiting for a button-press to continue the program exectution |
| 8 | + (Note: this does NOT work in ipython, if you run it with inline figures!) |
| 9 | +* Using a dictionary, which is similar to MATLAB structures |
| 10 | +* Extracting data which fulfill a certain condition |
| 11 | +* Calculating the best-fit-line to noisy data |
| 12 | +* Formatting text-output |
| 13 | +* Waiting for a keyboard-press |
| 14 | +* Calculating confidence intervals for line-fits |
| 15 | +* Saving figures |
| 16 | +
|
| 17 | +For such a short program, the definition of a "main" function, and calling |
| 18 | +it by default when the module is imported by the main program, is a bit |
| 19 | +superfluous. But it shows good Python coding style. |
| 20 | +''' |
| 21 | + |
| 22 | +# Copyright(c) 2017, Thomas Haslwanter. All rights reserved, under the CC BY-SA 4.0 International License |
| 23 | + |
| 24 | +# In contrast to MATLAB, you explicitly have to load the modules that you need. |
| 25 | +import numpy as np |
| 26 | +import matplotlib.pyplot as plt |
| 27 | + |
| 28 | +def main(): |
| 29 | + '''Define the main function. ''' |
| 30 | + |
| 31 | + # Create a sine-wave |
| 32 | + t = np.arange(0,10,0.1) |
| 33 | + x = np.sin(t) |
| 34 | + |
| 35 | + # Save the data in a text-file, in column form |
| 36 | + # The formatting is a bit clumsy: data are by default row variables; so to |
| 37 | + # get a matrix, you stack the two rows above each other, and then transpose |
| 38 | + # the matrix |
| 39 | + outFile = 'test.txt' |
| 40 | + np.savetxt(outFile, np.vstack([t,x]).T) |
| 41 | + |
| 42 | + # Read the data into a different variable |
| 43 | + inData = np.loadtxt(outFile) |
| 44 | + t2 = inData[:,0] # Note that Python starts at "0"! |
| 45 | + x2 = inData[:,1] |
| 46 | + |
| 47 | + # Plot the data, and wait for the user to click |
| 48 | + plt.plot(t2,x2) |
| 49 | + plt.title('Hit any key to continue') |
| 50 | + plt.waitforbuttonpress() |
| 51 | + |
| 52 | + # Generate a noisy line |
| 53 | + t = np.arange(-100,100) |
| 54 | + # use a Python "dictionary" for named variables |
| 55 | + par = {'offset':100, 'slope':0.5, 'noiseAmp':4} |
| 56 | + x = par['offset'] + par['slope']*t + par['noiseAmp']*np.random.randn(len(t)) |
| 57 | + |
| 58 | + # Select "late" values, i.e. with t>10 |
| 59 | + xHigh = x[t>10] |
| 60 | + tHigh = t[t>10] |
| 61 | + |
| 62 | + # Plot the "late" data |
| 63 | + plt.close() |
| 64 | + plt.plot(tHigh, xHigh) |
| 65 | + |
| 66 | + # Determine the best-fit line |
| 67 | + # To do so, you have to generate a matrix with "time" in the first |
| 68 | + # column, and a column of "1" in the second column: |
| 69 | + xMat = np.vstack((tHigh, np.ones_like(tHigh))).T |
| 70 | + slope, intercept = np.linalg.lstsq(xMat, xHigh, rcond=None)[0] |
| 71 | + |
| 72 | + # Show and plot the fit, and save it to a PNG-file with a medium resolution. |
| 73 | + # The "modern" way of Python-formatting is used |
| 74 | + plt.plot(tHigh, intercept + slope*tHigh, 'r') |
| 75 | + plt.title('Hit any key to continue') |
| 76 | + plt.savefig('linefit.png', dpi=200) |
| 77 | + plt.waitforbuttonpress() |
| 78 | + plt.close() |
| 79 | + print(('Fit line: intercept = {0:5.3f}, and slope = {1:5.3f}'.format(intercept, slope))) |
| 80 | + |
| 81 | + # If you want to know confidence intervals, best switch to "pandas" |
| 82 | + # Note that this is an advanced topic, and requires new data structures |
| 83 | + # such ad "DataFrames" and "ordinary-least-squares" or "ols-models". |
| 84 | + |
| 85 | + import pandas as pd |
| 86 | + import statsmodels.formula.api as smf |
| 87 | + |
| 88 | + # Put the data into a pandas DataFrame |
| 89 | + myDict = {'x':tHigh, 'y':xHigh} |
| 90 | + df = pd.DataFrame(myDict) |
| 91 | + |
| 92 | + # Fit the model: here the "formula"-syntax commonly used in statistics is employed |
| 93 | + # 'y~x' means "y is a linear function of x, taking a possible offset into consideration" |
| 94 | + results = smf.ols('y~x', data=df).fit() |
| 95 | + |
| 96 | + # Print the results |
| 97 | + print(results.summary()) |
| 98 | + #raw_input('These are the summary results from Pandas - Hit any key to continue') |
| 99 | + |
| 100 | + |
| 101 | +if __name__=='__main__': |
| 102 | + main() # Execute the main function |
0 commit comments