项目作者: sharmasapna

项目描述 :
10 Days of Statistics Hackerrank Solutions
高级语言:
项目地址: git://github.com/sharmasapna/10-Days-of-Statistics-HackerRank-solutions.git


10-Days-of-Statistics-HackerRank-solutions

statics-day0 - mean,median,mode

  1. size = int(input())
  2. num = list(map(int, input().split()))
  3. import numpy as np
  4. from scipy import stats
  5. print(np.mean(num))
  6. print(np.median(num))
  7. print(int(stats.mode(num)[0]))

statics-day0 - Weighted mean

  1. size = int(input())
  2. num = list(map(int,input().split()))
  3. wt = list(map(int,input().split()))
  4. res =[]
  5. for n,w in zip(num,wt):
  6. res.append(n*w)
  7. print( round(sum(res)/sum(wt),1) )

Day 1: Quartiles

Task
Given an array, X, of integers, calculate the respective first quartile (Q1), second quartile (Q2), and third quartile (Q3). It is guaranteed that ,Q1 ,Q2 and Q3 are integers.

  1. size = int(input())
  2. num = list(map(int,input().split()))
  3. num = sorted(num)
  4. #print(num)
  5. def getmedian(arr):
  6. size = len(arr)
  7. if size == 1:
  8. return arr,arr,arr[0]
  9. index_median = size//2
  10. med = 0
  11. if size%2 == 1:
  12. med = arr[size//2]
  13. r_array = arr[index_median+1:]
  14. else:
  15. med =(arr[size//2] + arr[((size-1)//2)])/2
  16. r_array = arr[index_median:]
  17. l_array = arr[:index_median]
  18. return l_array,r_array,med
  19. l,r,q2 = getmedian(num)
  20. #print(l)
  21. l1,r1,q1 = getmedian(l)
  22. #print(r)
  23. l3,r3,q3 = getmedian(r)
  24. print(int(q1))
  25. print(int(q2))
  26. print(int(q3))

Day 1: Standard Deviation

  1. size = int(input())
  2. arr = list(map(int, input().split(" ")))
  3. mean = sum(arr)/size
  4. sd_arr = []
  5. for val in arr:
  6. mse = (val-mean)**2
  7. sd_arr.append(mse)
  8. sd = (sum(sd_arr)/size)**0.5
  9. sd = round(sd,1)
  10. print(sd)

Day 1: Interquartile Range

  1. size = int(input())
  2. arr = list(map(int,input().split(" ")))
  3. freq = list(map(int,input().split(" ")))
  4. arr_final = []
  5. for val,f in zip(arr,freq):
  6. for fre in range(f):
  7. arr_final.append(val)
  8. num = sorted(arr_final)
  9. #print(num)
  10. def getmedian(arr):
  11. size = len(arr)
  12. if size == 1:
  13. return arr,arr,arr[0]
  14. index_median = size//2
  15. med = 0
  16. if size%2 == 1:
  17. med = arr[size//2]
  18. r_array = arr[index_median+1:]
  19. else:
  20. med =(arr[size//2] + arr[((size-1)//2)])/2
  21. r_array = arr[index_median:]
  22. l_array = arr[:index_median]
  23. return l_array,r_array,med
  24. l,r,q2 = getmedian(num)
  25. #print(l)
  26. l1,r1,q1 = getmedian(l)
  27. #print(r)
  28. l3,r3,q3 = getmedian(r)
  29. print((q3-q1)/1.0)

5/6
1/9

Day 2

17/42

Day3

1/3
12/51
2/3

Day 4: Binomial Distribution I

  1. l = list(map(float,input().split()))
  2. b = l[0]
  3. g = l[1]
  4. p_boy = b/(b+g)
  5. p_girl = 1 - (b/(b+g))
  6. #print(b,g)
  7. n= 6 # total number of children
  8. boys = 3 # at least 3 boys
  9. p =(p_boy**3)*(p_girl**3)*20 + (p_boy**4)*(p_girl**2)*15 +(p_boy**5)*(p_girl**1)*6 + (p_boy**6)
  10. print(round(p,3))

Day 4: Binomial Distribution II

  1. # input
  2. values = list(map(float, input().split()))
  3. p = (values[0] / 100)
  4. n = int(values[1])
  5. def fact(n):
  6. if n == 1 or n==0:
  7. return 1
  8. fact = 1
  9. for val in range(1,n+1):
  10. fact *= val
  11. return fact
  12. def binom(x,n,p):
  13. f = fact(n)/(fact(n-x)*fact(x))
  14. return f * (p**x ) * (1-p)**(n-x)
  15. # no more than 2 rejects means we have to consider {0,1,2} rejects
  16. nm = 0
  17. for i in range(3):
  18. nm += binom(i,n,p)
  19. print(round(nm,3))
  20. # atleast 2 rejects means { 2,3,4,5,6,7,8,9,10}
  21. at = 0
  22. for i in range(2,11):
  23. at += binom(i,n,p)
  24. print(round(at,3))

Day 4: Geometric Distribution I

  1. values = list(map(float, input().split()))
  2. p = (values[0] / values[1])
  3. n = int(input())
  4. def gm(n,p):
  5. return ((1-p)**(n-1) ) * p
  6. print(round( gm(n,p),3) )

Day 4: Geometric Distribution II

  1. values = list(map(float, input().split()))
  2. p = (values[0] / values[1])
  3. n = int(input())
  4. def gm(n,p):
  5. return ((1-p)**(n-1) ) * p
  6. res = 0
  7. for i in range (1,n+1):
  8. res += gm(i,p)
  9. print(round(res,3))

Day 5: Poisson Distribution I

  1. import math
  2. # input
  3. l = float(input())
  4. k = int(input())
  5. def fact(n):
  6. if n == 0 or n == 1:
  7. return 1
  8. res = 1
  9. for val in range(1,n+1):
  10. res = res*val
  11. return res
  12. #print(fact(5))
  13. def pois(k,lmda):
  14. p = (lmda**(k) * math.exp(-lmda)) / fact(k)
  15. return(p)
  16. print(round(pois(k,l),3))

Day 5: Poisson Distribution II

  1. import math
  2. # input
  3. means = list(map(float,input().split(" ")))
  4. mean_a, mean_b = float(means[0]), float(means[1])
  5. # factorial
  6. def fact(n):
  7. if n == 0 or n == 1:
  8. return 1
  9. res = 1
  10. for val in range(1,n+1):
  11. res = res*val
  12. return res
  13. #print(fact(5))
  14. # poison calculation
  15. def pois(k,lmda):
  16. p = (lmda**(k) * math.exp(-lmda)) / fact(k)
  17. return(p)
  18. # calculating the expected value
  19. exp_a=0
  20. exp_b=0
  21. for val in range(100):
  22. exp_a += (160 + 40 * (val**2)) * pois(val,mean_a)
  23. exp_b += (128 + 40 * (val**2)) * pois(val,mean_b)
  24. print(round(exp_a,3))
  25. print(round(exp_b,3))

Day 5: Normal Distribution I

  1. import math
  2. # input
  3. nd = list(input().split(" "))
  4. mean = float(nd[0])
  5. var = float(nd[1])**2
  6. less_than = float(input())
  7. range_ = list(input().split(" "))
  8. from_val = float(range_[0])
  9. to_val = float(range_[1])
  10. def cum_pdf(x,mean,var):
  11. return 0.5*(1 + math.erf( (x-mean)/(2*var)**(0.5) ))
  12. # less than 19.5 hours
  13. p1 = cum_pdf(less_than,mean,var)
  14. # between 20 and 22
  15. p2 = cum_pdf(to_val,mean,var) - cum_pdf(from_val,mean,var)
  16. print(round(p1,3))
  17. print(round(p2,3))

Day 5: Normal Distribution II

  1. import math
  2. # input
  3. nd = list(input().split(" "))
  4. mean = float(nd[0])
  5. var = float(nd[1])**2
  6. more_than = int(input())
  7. less_than = int(input())
  8. def cum_pdf(x,mean,var):
  9. return 0.5*(1 + math.erf( (x-mean)/(2*var)**(0.5) ))
  10. p1 = cum_pdf(more_than,mean,var)
  11. p1 = 1-p1
  12. p2 = cum_pdf(less_than,mean,var)
  13. p2 = 1-p2
  14. p3 = cum_pdf(less_than,mean,var)
  15. print(round(p1*100,2))
  16. print(round(p2*100,2))
  17. print(round(p3*100,2))

Day 6: The Central Limit Theorem I

  1. import math
  2. max_wt = float(input())
  3. num_boxes = float(input())
  4. mean = float(input())
  5. sd = float(input())
  6. var = sd**2
  7. mean_prime = num_boxes * mean
  8. sd_prime = (num_boxes**(0.5)) * sd
  9. var_prime = num_boxes * (sd**2)
  10. def cum_pdf(x,mean,var):
  11. return 0.5*(1 + math.erf( (x-mean)/(2*var)**(0.5) ))
  12. p = cum_pdf(max_wt,mean_prime,var_prime)
  13. print(round(p,4))

Day 6: The Central Limit Theorem II

  1. import math
  2. last_min = float(input())
  3. num_students = float(input())
  4. mean = float(input())
  5. sd = float(input())
  6. var = sd**2
  7. mean_prime = num_students * mean
  8. sd_prime = (num_students**(0.5)) * sd
  9. var_prime = num_students * (sd**2)
  10. def cum_pdf(x,mean,var):
  11. return 0.5*(1 + math.erf( (x-mean)/(2*var)**(0.5) ))
  12. p = cum_pdf(last_min,mean_prime,var_prime)
  13. print(round(p,4))

Day 6: The Central Limit Theorem III

  1. import math
  2. sample_size = float(input())
  3. mean = float(input())
  4. sd = float(input())
  5. per_cover = float(input())
  6. z = float(input())
  7. a = mean - z * (sd / math.sqrt(sample_size))
  8. b = mean + z * (sd / math.sqrt(sample_size))
  9. print (round(a,2))
  10. print (round(b,2))

Day 7: Pearson Correlation Coefficient I

  1. import statistics
  2. size = int(input())
  3. x = list(map(float,input().split()))
  4. y = list(map(float,input().split()))
  5. mean_x,mean_y = statistics.mean(x) , statistics.mean(y)
  6. sd_x,sd_y = statistics.pstdev(x),statistics.pstdev(y)
  7. num =0
  8. for a,b in zip(x,y):
  9. num += (a-mean_x)*(b-mean_y)
  10. pcc = num/(size*sd_x*sd_y)
  11. print(round(pcc,3))

Day 7: Spearman’s Rank Correlation Coefficient

  1. size = int(input())
  2. x = list(map(float,input().split()))
  3. y = list(map(float,input().split()))
  4. rank_x = []
  5. for val in x:
  6. rank = sorted(x).index(val) +1
  7. rank_x.append(rank)
  8. rank_y = []
  9. for val in y:
  10. rank = sorted(y).index(val) +1
  11. rank_y.append(rank)
  12. d=0
  13. for a,b in zip(rank_x,rank_y):
  14. d += (a-b)**2
  15. srcc = 1 - 6*d/((size**2-1)*size)
  16. print(round(srcc,3))

Day 8: Least Square Regression Line

  1. x = [95,85,80,70,60]
  2. y = [85,95,70,65,70]
  3. mean_x,mean_y = sum(x)/len(x), sum(y)/len(y)
  4. x_square = 0
  5. xy = 0
  6. for a,b in zip(x,y):
  7. x_square += a**2
  8. xy += a*b
  9. n = len(x)
  10. # calculating the slope
  11. b = ( n*xy - sum(x)*sum(y)) / (n*x_square - (sum(x))**2 )
  12. # calculating the intercept
  13. a = mean_y - b*mean_x
  14. y_pred = a + b* 80
  15. print(round(y_pred,3))

Day 8: Pearson Correlation Coefficient II

-3/4

Day 9: Multiple Linear Regression

  1. from sklearn import linear_model
  2. # input
  3. i = input().split()
  4. n = int(i[0]) # number of features
  5. l = int(i[1]) # number of samples
  6. x=[]
  7. y=[]
  8. for _ in range(l):
  9. inp = list(map(float,input().split()))
  10. temp =[]
  11. for val in range(n):
  12. temp.append(inp[val])
  13. x.append(temp)
  14. y.append(inp[n])
  15. out_len = int(input()) # length of samples for which the prediction is to be made
  16. out_array=[]
  17. for _ in range(out_len) :
  18. inp = list(map(float,input().split()))
  19. out_array.append(inp)
  20. # calculating the coefficient and intercept
  21. lm = linear_model.LinearRegression()
  22. lm.fit(x, y)
  23. a = lm.intercept_
  24. b = lm.coef_
  25. # predicting the value for the given samples
  26. for val in out_array:
  27. b_sum =0
  28. for i in range(n):
  29. b_sum += b[i]*val[i]
  30. y_pred = a + b_sum
  31. print(round(y_pred,2))