From 398d1a9ef89af0447c3a4c9d591eb32f23b4a771 Mon Sep 17 00:00:00 2001 From: John Blischak Date: Tue, 22 Mar 2022 15:06:22 -0400 Subject: [PATCH] Remove deprecation warnings from numpy 1.20.0 https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations --- compute_ldscores_from_ld.py | 6 +++--- finemapper.py | 24 ++++++++++++------------ ldsc_polyfun/jackknife.py | 2 +- ldsc_polyfun/sumstats.py | 2 +- munge_polyfun_sumstats.py | 8 ++++---- polyfun.py | 4 ++-- polyloc.py | 6 +++--- polypred.py | 6 +++--- 8 files changed, 29 insertions(+), 29 deletions(-) diff --git a/compute_ldscores_from_ld.py b/compute_ldscores_from_ld.py index 94574a2..f3f941c 100644 --- a/compute_ldscores_from_ld.py +++ b/compute_ldscores_from_ld.py @@ -87,8 +87,8 @@ def load_ld_npz(ld_dir, ld_prefix): def get_bcor_meta(bcor_obj): df_ld_snps = bcor_obj.getMeta() df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise') - df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int) - df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int) + df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64) + df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64) df_ld_snps = set_snpid_index(df_ld_snps) return df_ld_snps @@ -215,7 +215,7 @@ def compute_ldscores_chr(df_annot_chr, ld_dir=None, use_ukb=False, n=None, ld_fi #check if the data is binary df_annot_chr_raw = df_annot_chr.drop(columns=META_COLUMNS, errors='raise') - if np.all(df_annot_chr_raw.dtypes == np.bool): + if np.all(df_annot_chr_raw.dtypes == bool): is_binary = True elif np.all([len(np.unique(df_annot_chr_raw[c]))<=2 for c in df_annot_chr_raw.columns]): is_binary = True diff --git a/finemapper.py b/finemapper.py index ee0a90f..cb81b6d 100644 --- a/finemapper.py +++ b/finemapper.py @@ -85,8 +85,8 @@ def load_ld_npz(ld_prefix): def get_bcor_meta(bcor_obj): df_ld_snps = bcor_obj.getMeta() df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise') - ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int) - df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int) + ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64) + df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64) return df_ld_snps @@ -260,7 +260,7 @@ def sync_ld_sumstats(self, ld_arr, df_ld_snps, allow_missing=False): df_ld_snps = set_snpid_index(df_ld_snps, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles) if ld_arr is None: - df_ld = pd.DataFrame(np.zeros(len(df_ld_snps.index), dtype=np.int), index=df_ld_snps.index, columns=['dummy']) + df_ld = pd.DataFrame(np.zeros(len(df_ld_snps.index), dtype=np.int64), index=df_ld_snps.index, columns=['dummy']) else: assert ld_arr.shape[0] == df_ld_snps.shape[0] assert ld_arr.shape[0] == ld_arr.shape[1] @@ -352,8 +352,8 @@ def find_cached_ld_file(self, locus_start, locus_end, need_bcor=False): df_ld_snps = bcor_obj.getMeta() del bcor_obj df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise') - ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int) - df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int) + ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64) + df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64) else: raise IOError('unknown file extension') df_ld_snps = set_snpid_index(df_ld_snps, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles) @@ -495,7 +495,7 @@ def compute_ld_plink(self, locus_start, locus_end, verbose): df_bim.rename(columns={'snp':'SNP', 'pos':'BP', 'chrom':'CHR', 'a0':'A2', 'a1':'A1'}, inplace=True) df_bim['A1'] = df_bim['A1'].astype('str') df_bim['A2'] = df_bim['A2'].astype('str') - df_bim['CHR'] = df_bim['CHR'].astype(np.int) + df_bim['CHR'] = df_bim['CHR'].astype(np.int64) del df_bim['i'] del df_bim['cm'] bed = bed.T @@ -512,10 +512,10 @@ def compute_ld_plink(self, locus_start, locus_end, verbose): mem_limit = 1 else: mem_limit = self.memory - chunk_size = np.int((np.float(mem_limit) * 0.8) / bed.shape[0] / 4 * (2**30)) + chunk_size = np.int64((np.float64(mem_limit) * 0.8) / bed.shape[0] / 4 * (2**30)) if chunk_size==0: chunk_size=1 if chunk_size > bed.shape[1]: chunk_size = bed.shape[1] - num_chunks = np.int(np.ceil(bed.shape[1] / chunk_size)) + num_chunks = np.int64(np.ceil(bed.shape[1] / chunk_size)) if num_chunks>1: assert chunk_size * (num_chunks-2) < bed.shape[1]-1 if chunk_size * (num_chunks-1) >= bed.shape[1]: @@ -893,13 +893,13 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob df_susie['DISTANCE_FROM_CENTER'] = np.abs(df_susie['BP'] - middle) #mark causal sets - self.susie_dict = {key:np.array(susie_obj.rx2(key), dtype=np.object) for key in list(susie_obj.names)} + self.susie_dict = {key:np.array(susie_obj.rx2(key), dtype=object) for key in list(susie_obj.names)} df_susie['CREDIBLE_SET'] = 0 susie_sets = self.susie_dict['sets'][0] #if type(susie_sets) != self.RNULLType: try: for set_i, susie_set in enumerate(susie_sets): - is_in_set = np.zeros(df_susie.shape[0], dtype=np.bool) + is_in_set = np.zeros(df_susie.shape[0], dtype=bool) is_in_set[np.array(susie_set)-1] = True is_in_set[df_susie['CREDIBLE_SET']>0] = False df_susie.loc[is_in_set, 'CREDIBLE_SET'] = set_i+1 @@ -979,7 +979,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob if ld_file is not None: raise ValueError('cannot specify an ld file when assuming a single causal SNP per locus') ld_file = finemap_output_prefix+'.ld' - np.savetxt(ld_file, np.eye(self.df_sumstats_locus.shape[0], dtype=np.int), fmt='%s') + np.savetxt(ld_file, np.eye(self.df_sumstats_locus.shape[0], dtype=np.int64), fmt='%s') else: if ld_file is None: ld_data = self.get_ld_data(locus_start, locus_end, need_bcor=True, verbose=verbose) @@ -1016,7 +1016,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob #flip some of the alleles if num_causal_snps == 1: - is_flipped = np.zeros(self.df_sumstats_locus.shape[0], dtype=np.bool) + is_flipped = np.zeros(self.df_sumstats_locus.shape[0], dtype=bool) else: if ld_file.endswith('.bcor'): bcor_obj = bcor(ld_file) diff --git a/ldsc_polyfun/jackknife.py b/ldsc_polyfun/jackknife.py index 937ce9d..fd1e8fb 100644 --- a/ldsc_polyfun/jackknife.py +++ b/ldsc_polyfun/jackknife.py @@ -705,7 +705,7 @@ def __init__(self, x, y, n_blocks=None, separators=None, chr_num=None, verbose=T def _divide_chromosomes_to_sets(self, chr_sizes, num_sets): chr_order = np.argsort(chr_sizes)[::-1] #np.arange(len(chr_sizes)) - chr_assignments = np.zeros(22, dtype=np.int) - 1 + chr_assignments = np.zeros(22, dtype=np.int64) - 1 chr_assignments[chr_order[:num_sets]] = np.arange(num_sets) set_sizes = chr_sizes[chr_order[:num_sets]].copy() for c_i in chr_order[num_sets : len(chr_sizes)]: diff --git a/ldsc_polyfun/sumstats.py b/ldsc_polyfun/sumstats.py index ed5e7c4..7c3939b 100644 --- a/ldsc_polyfun/sumstats.py +++ b/ldsc_polyfun/sumstats.py @@ -255,7 +255,7 @@ def _read_ld_sumstats(args, log, fh, alleles=True, dropna=True): #keep only requested annotations if --anno was specified if args.anno is not None: - cols_to_keep = np.zeros(len(ref_ld.columns), dtype=np.bool) + cols_to_keep = np.zeros(len(ref_ld.columns), dtype=bool) annotations = args.anno.split(',') is_found1 = np.isin(annotations, ref_ld.columns.str[:-2]) is_found2 = np.isin(annotations, ref_ld.columns.str[:-4]) diff --git a/munge_polyfun_sumstats.py b/munge_polyfun_sumstats.py index fef1cf6..f779912 100644 --- a/munge_polyfun_sumstats.py +++ b/munge_polyfun_sumstats.py @@ -21,7 +21,7 @@ def find_df_column(df, strings_to_find, allow_missing=False): if isinstance(strings_to_find, str): strings_to_find = [strings_to_find] - is_relevant_col = np.zeros(df.shape[1], dtype=np.bool) + is_relevant_col = np.zeros(df.shape[1], dtype=bool) for str_to_find in strings_to_find: is_relevant_col = is_relevant_col | (df.columns.str.upper() == str_to_find.upper()) if np.sum(is_relevant_col)==0: @@ -110,7 +110,7 @@ def compute_z(df_sumstats): def filter_sumstats(df_sumstats, min_info_score=None, min_maf=None, remove_strand_ambig=False, keep_hla=False): logging.info('%d SNPs are in the sumstats file'%(df_sumstats.shape[0])) - is_good_snp = np.ones(df_sumstats.shape[0], dtype=np.bool) + is_good_snp = np.ones(df_sumstats.shape[0], dtype=bool) #remove 'bad' BOLT-LMM SNPs if 'CHISQ_BOLT_LMM' in df_sumstats.columns: @@ -142,7 +142,7 @@ def filter_sumstats(df_sumstats, min_info_score=None, min_maf=None, remove_stran #find strand ambiguous summary statistics if remove_strand_ambig: - is_strand_ambig = np.zeros(df_sumstats.shape[0], dtype=np.bool) + is_strand_ambig = np.zeros(df_sumstats.shape[0], dtype=bool) for ambig_pairs in [('A', 'T'), ('T', 'A'), ('C', 'G'), ('G', 'C')]: is_strand_ambig = is_strand_ambig | ((df_sumstats['A2']==ambig_pairs[0]) & (df_sumstats['A1']==ambig_pairs[1])) is_good_snp = is_good_snp & (~is_strand_ambig) @@ -171,7 +171,7 @@ def filter_sumstats(df_sumstats, min_info_score=None, min_maf=None, remove_stran def compute_casecontrol_neff(df_sumstats): logging.info('Computing the effective sample size for case-control data...') - Neff = (4.0 / (1.0/df_sumstats['N_CASES'] + 1.0/df_sumstats['N_CONTROLS'])).astype(np.int) + Neff = (4.0 / (1.0/df_sumstats['N_CASES'] + 1.0/df_sumstats['N_CONTROLS'])).astype(np.int64) return Neff diff --git a/polyfun.py b/polyfun.py index d9b6137..fe56915 100644 --- a/polyfun.py +++ b/polyfun.py @@ -405,7 +405,7 @@ def create_df_bins(self, bin_sizes, df_snpvar, df_snpvar_sorted=None, min_bin_si ind=0 df_bins = pd.DataFrame(index=df_snpvar_sorted.index) for bin_i, bin_size in enumerate(bin_sizes): - snpvar_bin = np.zeros(df_bins.shape[0], dtype=np.bool) + snpvar_bin = np.zeros(df_bins.shape[0], dtype=bool) snpvar_bin[ind : ind+bin_size] = True df_bins['snpvar_bin%d'%(len(bin_sizes) - bin_i)] = snpvar_bin ind += bin_size @@ -462,7 +462,7 @@ def partition_snps_Ckmedian(self, args, use_ridge): seg_obj = median_seg_func(df_snpvar_sorted.values, k=np.array([5,30])) else: seg_obj = median_seg_func(df_snpvar_sorted.values, k=args.num_bins) - bin_sizes = np.array(seg_obj.rx2('size')).astype(np.int) + bin_sizes = np.array(seg_obj.rx2('size')).astype(np.int64) num_bins = len(bin_sizes) logging.info('Ckmedian.1d.dp partitioned SNPs into %d bins'%(num_bins)) diff --git a/polyloc.py b/polyloc.py index 6056110..ff3059d 100644 --- a/polyloc.py +++ b/polyloc.py @@ -204,16 +204,16 @@ def compute_Mp(self, p, cumsum_prop_h2, cumnum_binsize): num_jk = cumsum_prop_h2.shape[1] last_bin_index = np.argmax(cumsum_prop_h2 >= p, axis=0) - num_snps_bin1 = np.zeros(num_jk, dtype=np.int) + num_snps_bin1 = np.zeros(num_jk, dtype=np.int64) h2_bin1 = np.zeros(num_jk) num_snps_bin1[last_bin_index != 0] = cumnum_binsize[last_bin_index[last_bin_index != 0] - 1] h2_bin1[last_bin_index != 0] = cumsum_prop_h2[last_bin_index[last_bin_index != 0] - 1, np.arange(num_jk)[last_bin_index != 0]] num_snps_bin2 = cumnum_binsize[last_bin_index] h2_bin2 = cumsum_prop_h2[last_bin_index, np.arange(num_jk)] - slope = (num_snps_bin2-num_snps_bin1).astype(np.float) / (h2_bin2-h2_bin1) + slope = (num_snps_bin2-num_snps_bin1).astype(np.float64) / (h2_bin2-h2_bin1) assert not np.any(np.isnan(slope)) - Mp = np.ceil(num_snps_bin1 + slope * (p - h2_bin1)).astype(np.int) + Mp = np.ceil(num_snps_bin1 + slope * (p - h2_bin1)).astype(np.int64) return Mp diff --git a/polypred.py b/polypred.py index e5df7be..f0ea9f3 100644 --- a/polypred.py +++ b/polypred.py @@ -47,7 +47,7 @@ def create_plink_range_file(df_betas, temp_dir, num_jk=200): scores_file = os.path.join(temp_dir, 'snp_scores.txt') separators = np.floor(np.linspace(0, df_betas.shape[0], num_jk+1)).astype(int) df_betas['score'] = 0 - is_in_range = np.zeros(df_betas.shape[0], dtype=np.bool) + is_in_range = np.zeros(df_betas.shape[0], dtype=bool) for i in range(len(separators)-1): is_in_range[separators[i] : separators[i+1]] = True df_betas.loc[is_in_range, 'score'] = i+1.5 @@ -176,7 +176,7 @@ def load_betas_files(betas_file, verbose=True): df_betas.rename(columns={'sid':'SNP', 'nt1':'A1', 'nt2':'A2', 'BETA_MEAN':'BETA', 'ldpred_inf_beta':'BETA', 'chrom':'CHR', 'Chrom':'CHR', 'pos':'BP'}, inplace=True, errors='ignore') if not is_numeric_dtype(df_betas['CHR']): if df_betas['CHR'].str.startswith('chrom_').all(): - df_betas['CHR'] = df_betas['CHR'].str[6:].astype(np.int) + df_betas['CHR'] = df_betas['CHR'].str[6:].astype(np.int64) else: raise ValueError('unknown CHR format') df_betas.rename(columns={'BETA_joint':'BETA', 'ALLELE1':'A1', 'ALLELE0':'A2', 'beta_mean':'BETA', 'MAF_BOLT':'A1Frq', 'Name':'SNP', 'A1Effect':'BETA', 'Name':'SNP', 'Chrom':'CHR', 'Position':'BP', 'beta':'BETA'}, inplace=True, errors='ignore') @@ -284,7 +284,7 @@ def estimate_mixing_weights(args): float(df_pheno['PHENO'].iloc[0]) except: df_pheno = df_pheno.iloc[1:] - df_pheno['PHENO'] = df_pheno['PHENO'].astype(np.float) + df_pheno['PHENO'] = df_pheno['PHENO'].astype(np.float64) if np.any(df_pheno.index.duplicated()): raise ValueError('duplicate ids found in %s'%(args.pheno))