# 积分-排名曲线

 1 #! /usr/bin/gnuplot
2 set terminal png size 1080,720   #建立空白图片
3 set title usr.": score (".y1range.") rank (".y2range.")"  #注明曲线图标题
4 set output "fit.png"   #设置文件名
5 set key left
6 set grid
7
8 set xlabel "score"
9 set ylabel "rank"
10
11 #plot "score.txt" using 1:2 with lp pt 13 title "score" axis x1y1, "score.txt" using 1:3 with lp pt 3 title "rank" axis x1y2
12 plot "score.txt" using 2:3 with lp pt 13 title "score-rank"
13
14 quit   #退出软件

# 数据拟合

f1(x)=a*x^2+b*x+c
f2(x)=f/x+g
f3(x)=j*atan(x)+k
f4(x)=m*log(x)+n

 1 #! /usr/bin/gnuplot
2 set terminal png size 1080,720   #建立空白图片
3 set title usr.": score (".y1range.") rank (".y2range.")"  #注明曲线图标题
4 set output "fit.png"   #设置文件名
5 set key left
6 set grid
7
8 set xlabel "score"
9 set ylabel "rank"
10
11 y1(x)=a*x**2+b*x+c
12 fit y1(x) "score.txt" using 2:3 via a,b,c
13
14 y2(x)=f/x+g
15 fit y2(x) "score.txt" using 2:3 via f,g
16
17 y3(x)=j*atan(x)+k
18 fit y3(x) "score.txt" using 2:3 via j,k
19
20 y4(x)=m*log(x)+n
21 fit y4(x) "score.txt" using 2:3 via m,n
22
23 plot "score.txt" using 2:3 with lp pt 13 title "score-rank", \
24     y1(x) with l lw 2 lt 2 title "f(x)=ax^2+bx+c", \
25     y2(x) with l lw 3 lt 3 title "f(x)=a/x+b", \
26     y3(x) with l lw 1 lt 4 title "f(x)=a*atan(x)+b", \
27     y4(x) with l lw 2 lt 5 title "f(x)=a*log(x)+b"
28
29 quit   #退出软件

line 11-21 定义了各个拟合函数，line 24-27 增加了拟合曲线的绘制。如果能将拟合后的函数参数标识出来，就更好了，其实也不难，因为 a/b/c/f/g/j/k/m/n 这些参数在 gnuplot 脚本中就可以直接访问，只需要在图例显示处增加一些代码就可以了：

plot "score.txt" using 2:3 with lp pt 13 title "score-rank", \
y1(x) with l lw 6 lt 2 title sprintf("f1(x)=%.8fx^2%+fx%+.0f",a,b,c), \
y2(x) with l lw 5 lt 3 title sprintf("f2(x)=%.2f/x%+.0f",f,g), \
y3(x) with l lw 2 lt 4 title sprintf("f3(x)=%.2fatan(x)%+.0f",j,k), \
y4(x) with l lw 2 lt 5 title sprintf("f4(x)=%.2flog(x)%+.0f",m,n)

# 数据预测

 1 #! /usr/bin/gnuplot
2 set terminal png size 1080,720   #建立空白图片
3 set title usr.": score (".y1range.") rank (".y2range.")"  #注明曲线图标题
4 set output "fit.png"   #设置文件名
5 set key left
6 set grid
7
8 set xlabel "score"
9 set ylabel "rank"
10
11 y1(x)=a*x**2+b*x+c
12 fit y1(x) "score.txt" using 2:3 via a,b,c
13
14 y2(x)=f/x+g
15 fit y2(x) "score.txt" using 2:3 via f,g
16
17 y3(x)=m*log(x)+n
18 fit y3(x) "score.txt" using 2:3 via m,n
19
20 xval=40000
21 y1val=a*xval**2+b*xval+c
22 y2val=f/xval+g
23 y3val=m*log(xval)+n
24
25 set label 1 sprintf("f1(%.0f)=%.0f",xval,y1val) at graph 0.6,0.7 left
26 set label 2 sprintf("f2(%.0f)=%.0f",xval,y2val) at graph 0.6,0.65 left
27 set label 3 sprintf("f3(%.0f)=%.0f",xval,y3val) at graph 0.6,0.6 left
28
29 plot "score.txt" using 2:3 with lp pt 13 title "score-rank", \
30     y1(x) with l lw 4 lt 2 title sprintf("f1(x)=%.8fx^2%+fx%+.0f",a,b,c), \
31     y2(x) with l lw 3 lt 3 title sprintf("f2(x)=%.2f/x%+.0f",f,g), \
32     y3(x) with l lw 2 lt rgb "red" title sprintf("f3(x)=%.2flog(x)%+.0f",m,n)
33
34
35 quit   #退出软件

# 绘制预测曲线

## 输出预测值

*******************************************************************************
Mon Jul  5 14:22:31 2021

FIT:    data read from "score.txt" using 2:3
format = x:z
#datapoints = 365
residuals are weighted equally (unit weight)

function used for fitting: y1(x)
y1(x)=a*x**2+b*x+c
fitted parameters initialized with current variable values

iter      chisq       delta/lim  lambda   a             b             c
0 1.2882895936e+19   0.00e+00  1.08e+08    1.000000e+00   1.000000e+00   1.000000e+00
11 6.0109804999e+08  -9.02e-01  1.08e-03    1.991252e-04  -8.363164e+00   1.465348e+05

After 11 iterations the fit converged.
final sum of squares of residuals : 6.01098e+08
rel. change during last iteration : -9.01929e-06

degrees of freedom    (FIT_NDF)                        : 362
rms of residuals      (FIT_STDFIT) = sqrt(WSSR/ndf)    : 1288.6
variance of residuals (reduced chisquare) = WSSR/ndf   : 1.66049e+06

Final set of parameters            Asymptotic Standard Error
=======================            ==========================
a               = 0.000199125      +/- 3.57e-06     (1.793%)
b               = -8.36316         +/- 0.08565      (1.024%)
c               = 146535           +/- 456.8        (0.3117%)

correlation matrix of the fit parameters:
a      b      c
a               1.000
b              -0.986  1.000
c               0.921 -0.969  1.000

*******************************************************************************
Mon Jul  5 14:22:31 2021

FIT:    data read from "score.txt" using 2:3
format = x:z
#datapoints = 365
residuals are weighted equally (unit weight)

function used for fitting: y2(x)
y2(x)=f/x+g
fitted parameters initialized with current variable values

iter      chisq       delta/lim  lambda   f             g
0 2.5365572521e+12   0.00e+00  7.07e-01    1.000000e+00   1.000000e+00
7 2.5241195880e+09  -4.95e-08  7.07e-08    3.478261e+08   4.432964e+04

After 7 iterations the fit converged.
final sum of squares of residuals : 2.52412e+09
rel. change during last iteration : -4.94761e-13

degrees of freedom    (FIT_NDF)                        : 363
rms of residuals      (FIT_STDFIT) = sqrt(WSSR/ndf)    : 2636.95
variance of residuals (reduced chisquare) = WSSR/ndf   : 6.9535e+06

Final set of parameters            Asymptotic Standard Error
=======================            ==========================
f               = 3.47826e+08      +/- 2.776e+06    (0.7982%)
g               = 44329.6          +/- 327.3        (0.7383%)

correlation matrix of the fit parameters:
f      g
f               1.000
g              -0.907  1.000

*******************************************************************************
Mon Jul  5 14:22:31 2021

FIT:    data read from "score.txt" using 2:3
format = x:z
#datapoints = 365
residuals are weighted equally (unit weight)

function used for fitting: y3(x)
y3(x)=m*log(x)+n
fitted parameters initialized with current variable values

iter      chisq       delta/lim  lambda   m             n
0 2.5360128666e+12   0.00e+00  6.58e+00    1.000000e+00   1.000000e+00
5 2.4184679129e+08  -5.46e-07  6.58e-05   -3.918657e+04   4.438066e+05

After 5 iterations the fit converged.
final sum of squares of residuals : 2.41847e+08
rel. change during last iteration : -5.46492e-12

degrees of freedom    (FIT_NDF)                        : 363
rms of residuals      (FIT_STDFIT) = sqrt(WSSR/ndf)    : 816.238
variance of residuals (reduced chisquare) = WSSR/ndf   : 666245

Final set of parameters            Asymptotic Standard Error
=======================            ==========================
m               = -39186.6         +/- 95.82        (0.2445%)
n               = 443807           +/- 886.9        (0.1998%)

correlation matrix of the fit parameters:
m      n
m               1.000
n              -0.999  1.000


sed -n '/[abcfgmn] *=.*/p' fit.log

a               = 0.000199125      +/- 3.57e-06     (1.793%)
b               = -8.36316         +/- 0.08565      (1.024%)
c               = 146535           +/- 456.8        (0.3117%)
f               = 3.47826e+08      +/- 2.776e+06    (0.7982%)
g               = 44329.6          +/- 327.3        (0.7383%)
m               = -39186.6         +/- 95.82        (0.2445%)
n               = 443807           +/- 886.9        (0.1998%)


sed -n '/[abcfgmn] *=.*/p' fit.log  | awk '{print $1,$2,$3+0}' 注意第三列使用 "$3+0" 的 trick 来保证提取的是浮点数据：

a = 0.000199125
b = -8.36316
c = 146535
f = 347826000
g = 44329.6
m = -39186.6
n = 443807


eval $(sed -n '/[abcfgmn] *=.*/p' fit.log | awk '{print$1,$2,$3+0}' | sed 's/ //g')

awk -v a=$a -v b=$b -v c=$c -v f=$f -v g=$g -v m=$m -v n=$n -v xval=$xval 'BEGIN { print "y1="int(a*xval*xval+b*xval+c+0.5); print "y2="int(f/xval+g+0.5); print "y3="int(m*log(xval)+n+0.5) }'

y1=130609
y2=53025
y3=28561


 1 #! /bin/sh
2 usr=$(cat user.txt) 3 firstline=$(cat ./score.txt | head -1)
4 y1min=$(echo$firstline | awk '{ print $2 }') 5 y2max=$(echo $firstline | awk '{ print$3 }')
6 lastline=$(cat ./score.txt | tail -1) 7 y1max=$(echo $lastline | awk '{ print$2 }')
8 y2min=$(echo$lastline | awk '{ print $3 }') 9 echo "y1 range [$y1min,$y1max], y2 range [$y2max,$y2min]" 10 gnuplot -e "usr='$usr'" -e "y1range='$(($y1max-$y1min))'" -e "y2range='$(($y2max-$y2min))'" ./draw.plt
11
12 # clear log to get generated values
13 rm fit.log 2>/dev/null
14 gnuplot -e "usr='$usr'" -e "y1max='$y1max'" -e "y2min='$y2min'" ./fit.plt 15 16 if [$# -gt 0 ]; then
17     # don't know how to print out parameter (a,b,c,f,g,m,n) from gnuplot script, so here extract them from fit.log (that's why we clear fit.log before calling fit.plt)
18     eval $(sed -n '/[abcfgmn] *=.*/p' fit.log | awk '{print$1,$2,$3+0}' | sed 's/ //g')
19     # after that line, a/b/c/f/g/m/n takes effect, now calculate predicating values
20
21     # predicate x*2, round result to integer
22     xval=$(($y1max*2))
23     eval $(awk -v a=$a -v b=$b -v c=$c -v f=$f -v g=$g -v m=$m -v n=$n -v xval=$xval 'BEGIN { print "y1="int(a*xval*xval+b*xval+c+0.5); print "y2="int(f/xval+g+0.5); print "y3="int(m*log(xval)+n+0.5) }') 24 25 # dump results to data files 26 echo "$xval $y1" >> predicate_binomial.data 27 echo "$xval $y2" >> predicate_reciprocal.data 28 echo "$xval $y3" >> predicate_logarithm.data 29 fi 30 31 # for centos 32 type eog > /dev/null 2>&1 33 if [$? -eq 0 ]; then
34     eog draw.png &
35     eog fit.png &
36     exit 0
37 fi
38
39 # for mac
40 type open > /dev/null 2>&1
41 if [ $? -eq 0 ]; then 42 open draw.png & 43 open fit.png & 44 exit 0 45 fi 46 47 # for windows msys2 48 type mspaint > /dev/null 2>&1 49 if [$? -eq 0 ]; then
50     mspaint draw.png &
51     mspaint fit.png &
52     exit 0
53 fi
54
55 exit 1

## 历史补算

 1 #! /bin/sh
2 # data should be parted into score.txt (with some basic historical data)
3 # and more.txt (with more recent data),
4 # and then we will repair predicating data by moving more.txt data
5 # into score.txt line by line and calling plot.sh without call png starter...
6
7 if [ ! -f "more.txt" ]; then
8     echo "you should split score.txt to score.txt & more.txt first before run this scripts..."
9     exit 1
10 fi
11
13 do
14     echo "repair line $line" 15 echo "$line" >> score.txt
16     ./plot.sh "update_predicating_data"
17 done < more.txt
18
19 echo "repair done, now submit predicat_*.data !"
20 rm more.txt

 1 if [ $# -gt 0 ]; then 2 # don't know how to print out parameter (a,b,c,f,g,m,n) from gnuplot script, so here extract them from fit.log (that's why we clear fit.log before calling fit.plt) 3 eval$(sed -n '/[abcfgmn] *=.*/p' fit.log  | awk '{print $1,$2,$3+0}' | sed 's/ //g') 4 # after that line, a/b/c/f/g/m/n takes effect, now calculate predicating values 5 6 # predicate x*2, round result to integer 7 xval=$(($y1max*2)) 8 eval$(awk -v a=$a -v b=$b -v c=$c -v f=$f -v g=$g -v m=$m -v n=$n -v xval=$xval 'BEGIN { print "y1="int(a*xval*xval+b*xval+c+0.5); print "y2="int(f/xval+g+0.5); print "y3="int(m*log(xval)+n+0.5) }')
9
10     # dump results to data files
11     echo "$xval$y1" >> predicate_binomial.data
12     echo "$xval$y2" >> predicate_reciprocal.data
13     echo "$xval$y3" >> predicate_logarithm.data
14 else
15     # for centos
16     type eog > /dev/null 2>&1
17     if [ $? -eq 0 ]; then 18 eog draw.png & 19 eog fit.png & 20 exit 0 21 fi 22 23 # for mac 24 type open > /dev/null 2>&1 25 if [$? -eq 0 ]; then
26         open draw.png &
27         open fit.png &
28         exit 0
29     fi
30
31     # for windows msys2
32     type mspaint > /dev/null 2>&1
33     if [ \$? -eq 0 ]; then
34         mspaint draw.png &
35         mspaint fit.png &
36         exit 0
37     fi
38 fi
39
40 exit 1

line 14 添加了一行 else，表示只有在脚本参数为 0 时才启动图片自动打开功能。

## 绘制预测线

 1 #! /usr/bin/gnuplot
2 set terminal png size 1080,720   #建立空白图片
3 set title usr.": score (".y1max.") rank (".y2min.")"  #注明曲线图标题
4 set output "fit.png"   #设置文件名
5 set key left reverse Left spacing 1.2
6 set grid
7
8 set xlabel "score"
9 set ylabel "rank"
10 # to prevent predicating value pollute our x-axis
11 set xrange [y1min-100:y1max+100]
13 set fit quiet
14
15 y1(x)=a*x**2+b*x+c
16 fit y1(x) "score.txt" using 2:3 via a,b,c
17
18 y2(x)=f/x+g
19 fit y2(x) "score.txt" using 2:3 via f,g
20
21 y3(x)=m*log(x)+n
22 fit y3(x) "score.txt" using 2:3 via m,n
23
24 xval=y1max*2
25 y1val=a*xval**2+b*xval+c
26 y2val=f/xval+g
27 y3val=m*log(xval)+n
28
29 set label 1 sprintf("f1(%.0f)=%.0f",xval,y1val) at graph 0.6,0.7 left
30 set label 2 sprintf("f2(%.0f)=%.0f",xval,y2val) at graph 0.6,0.65 left
31 set label 3 sprintf("f3(%.0f)=%.0f",xval,y3val) at graph 0.6,0.6 left
32
33 plot "score.txt" using 2:3 with lp pt 13 title "score-rank", \
34     y1(x) with l lw 4 lt 2 title sprintf("f1(x)=%.8fx^2%+fx%+.0f",a,b,c), \
35     y2(x) with l lw 3 lt 3 title sprintf("f2(x)=%.2f/x%+.0f",f,g), \
36     y3(x) with l lw 2 lt rgb "red" title sprintf("f3(x)=%.2flog(x)%+.0f",m,n), \
37     "predicate_binomial.data" using 1:2 with lp pt 12 lt 2 title "f1-pred", \
38     "predicate_reciprocal.data" using 1:2 with lp pt 11 lt 3 title "f2-pred", \
39     "predicate_logarithm.data" using 1:2 with lp pt 10 lt rgb "red" title "f3-pred"
40
41 quit   #退出软件

# 后记

https://github.com/goodpaperman/cnblogs

# 参考

[2]. awk将字符串转为数字的方法

[4]. Gnuplot重定向fit输出

[5]. gnuplot常用技巧

[7]. gnuplot使用手册

[9]. AWK 打印匹配内容之后的指定行

posted @ 2021-07-20 10:39  goodcitizen  阅读(507)  评论(2编辑  收藏  举报