Posts under category Psychology | 心理学

Python:样本和总体方差的比较示意图

未标题-1.jpg

绘制所用的Python代码如下:

import matplotlib.pyplot as plt
from scipy import linspace, sqrt
from scipy.special import erf
import numpy as np

color = '#339999'
color2 = '#660033'


def pdf(x, mu=0, sigma=1):
    """
    Calculates the normal distribution's probability density
    function (PDF).

    """
    term1 = 1.0 / (sqrt(2 * np.pi) * sigma)
    term2 = np.exp(-0.5 * ((x - mu) / sigma) ** 2)
    return term1 * term2


def cdf(_x):
    return (1 + erf(_x / sqrt(2))) / 2


def skew(_x, e, _w, a, z=100):
    t = (_x - e) / _w
    return z / _w * pdf(t) * cdf(a * t)


plt.figure(figsize=(20, 10))
plt.rcParams['font.sans-serif'] = ['SimHei']
plt.rcParams['axes.unicode_minus'] = False

n = 129
w = 2.5
y_text_offset = -0.5
y_title_offset = 8.1

fontsize = 24

middle_x = 0

x = linspace(middle_x - w * 3.3, middle_x + w * 3.3, n)
p = skew(x, middle_x, 3, 0)
plt.plot(x, p, 'r-', color=color, linewidth=3.0)
middle_index = list(p).index(max(p))
plt.text(x[middle_index + 12] - 0.4, max(p)-0.4, '总体', fontsize=fontsize, color=color)
plt.plot([x[middle_index], x[middle_index]], [0, p[middle_index]], '--', color=color,
         linewidth=3.0)
plt.text(x[middle_index] - 0.1, y_text_offset, '$\mu$', fontsize=fontsize, color=color)

plt.ylim([0, max(p) + 0.2])

middle_x = middle_x + 0.4
x = linspace(middle_x - w * 1.5, middle_x + w * 1.5, n)
p = skew(x, middle_x, 1.4, 0, 24)
plt.plot(x, p, 'r-', color=color2, linewidth=3.0)
plt.text(x[middle_index + 12] - 0.4, max(p), '样本', fontsize=fontsize, color=color2)
plt.plot([x[middle_index], x[middle_index]], [0, p[middle_index]], '--', color=color2,
         linewidth=3.0)
plt.text(x[middle_index] - 0.1, y_text_offset, r'$\barX$', fontsize=fontsize, color=color2)
print([min(x), max(x)])
plt.plot([min(x), max(x)], [y_text_offset, y_text_offset], 'r-', color=color2,
         linewidth=3.0)

plt.title('样本和总体的方差比较', fontsize=fontsize)

plt.yticks([])
plt.xticks([])

plt.show()

t检验与点二列相关的异同

t检验、点二列相关
两者侧重点不同,前者重在检验两列数据的均值差异,即宏观上是否来自同一总体,后者关注两列数据的相关程度。
相同点在都是一个二分数据将一列等距或等比数据分类,且两者数值可以通过公式相互转换。

转换的Python代码如下:

import math
import numpy as np


# s1 = [10, 9, 8, 8, 8, 7]
# s2 = [5, 5, 4, 4]

# s1 = [67, 73, 74, 70, 70, 75, 73, 68, 69]
# s2 = [69, 63, 67, 64, 61, 66, 60, 63, 63]

s1 = [84, 84, 88, 90, 78, 92, 94, 96, 88, 90]
s2 = [82, 76, 60, 72, 74, 76, 80, 78, 76, 74]

xbar1 = np.mean(s1)
xbar2 = np.mean(s2)


# Calculate sum of squares(和方)
def calc_ss(s):
  return sum(map(lambda x: x * x, s)) - sum(s) ** 2 / len(s)


ss1 = calc_ss(s1)
ss2 = calc_ss(s2)

xsquare1 = ss1 / (len(s1) - 1)
xsquare2 = ss2 / (len(s2) - 1)

print('Xbar1:', xbar1)
print('Xbar2:', xbar2)

print('SS1:', ss1)
print('SS2:', ss2)

print('Xsquare1:', xsquare1)
print('Xsquare2:', xsquare2)

spsquare = (ss1 + ss2) / (len(s1) + len(s2) - 2)
# 总体方差
print('SPsquare', spsquare)

se = np.sqrt( spsquare / len(s1) + spsquare / len(s2) )

print('SE:', se)


tobs = (xbar1 - xbar2) / se

print('t_obs:', tobs)

print('---------------------')

s3 = s1 + s2

sx = np.sqrt( calc_ss(s3) / len(s3) )

print('sx:', sx)

rpb = ((xbar1 - xbar2) / sx) * np.sqrt( (len(s1) / len(s3)) * (len(s2) / len(s3)) )

print('r_pb:', rpb)
print('t_obs:', tobs)
print('Fmax:', max(xsquare1, xsquare2) / min(xsquare1, xsquare2))

print(rpb ** 2, 'is r_pb^2')
print(tobs ** 2 / (tobs ** 2 + len(s1) + len(s2) - 2), 'is t^2 / (t^2 + df)')

Output:

Xbar1: 88.4
Xbar2: 74.8
SS1: 254.39999999999418
SS2: 321.59999999999854
Xsquare1: 28.26666666666602
Xsquare2: 35.73333333333317
SPsquare 31.999999999999595
SE: 2.5298221281346875
t_obs: 5.375872022286282
---------------------
sx: 8.662563131083052
r_pb: 0.7849870641173408
t_obs: 5.375872022286282
Fmax: 1.2641509433962497
0.616204690831562 is r_pb^2
0.6162046908315598 is t^2 / (t^2 + df)