Remove deprecation warnings from numpy 1.20.0

https://numpy.org/devdocs/release/1.20.0-notes.html#deprecations
RobinM-code · Mar 22, 2022 · 398d1a9 · 398d1a9
1 parent 8c9bfc7
commit 398d1a9
Show file tree

Hide file tree

Showing 8 changed files with 29 additions and 29 deletions.
diff --git a/compute_ldscores_from_ld.py b/compute_ldscores_from_ld.py
@@ -87,8 +87,8 @@ def load_ld_npz(ld_dir, ld_prefix):
 def get_bcor_meta(bcor_obj):
     df_ld_snps = bcor_obj.getMeta()
     df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise')
-    df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int)
-    df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int)
+    df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64)
+    df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64)
     df_ld_snps = set_snpid_index(df_ld_snps)
     return df_ld_snps
 
@@ -215,7 +215,7 @@ def compute_ldscores_chr(df_annot_chr, ld_dir=None, use_ukb=False, n=None, ld_fi
 
     #check if the data is binary
     df_annot_chr_raw = df_annot_chr.drop(columns=META_COLUMNS, errors='raise')
-    if np.all(df_annot_chr_raw.dtypes == np.bool):
+    if np.all(df_annot_chr_raw.dtypes == bool):
         is_binary = True
     elif np.all([len(np.unique(df_annot_chr_raw[c]))<=2 for c in df_annot_chr_raw.columns]):
         is_binary = True

diff --git a/finemapper.py b/finemapper.py
@@ -85,8 +85,8 @@ def load_ld_npz(ld_prefix):
 def get_bcor_meta(bcor_obj):
     df_ld_snps = bcor_obj.getMeta()
     df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise')
-    ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int)
-    df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int)
+    ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64)
+    df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64)
     return df_ld_snps
 
 
@@ -260,7 +260,7 @@ def sync_ld_sumstats(self, ld_arr, df_ld_snps, allow_missing=False):
         df_ld_snps = set_snpid_index(df_ld_snps, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles)
 
         if ld_arr is None:
-            df_ld = pd.DataFrame(np.zeros(len(df_ld_snps.index), dtype=np.int), index=df_ld_snps.index, columns=['dummy'])
+            df_ld = pd.DataFrame(np.zeros(len(df_ld_snps.index), dtype=np.int64), index=df_ld_snps.index, columns=['dummy'])
         else:
             assert ld_arr.shape[0] == df_ld_snps.shape[0]
             assert ld_arr.shape[0] == ld_arr.shape[1]
@@ -352,8 +352,8 @@ def find_cached_ld_file(self, locus_start, locus_end, need_bcor=False):
                 df_ld_snps = bcor_obj.getMeta()
                 del bcor_obj
                 df_ld_snps.rename(columns={'rsid':'SNP', 'position':'BP', 'chromosome':'CHR', 'allele1':'A1', 'allele2':'A2'}, inplace=True, errors='raise')
-                ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int)
-                df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int)
+                ###df_ld_snps['CHR'] = df_ld_snps['CHR'].astype(np.int64)
+                df_ld_snps['BP'] = df_ld_snps['BP'].astype(np.int64)
             else:
                 raise IOError('unknown file extension')
             df_ld_snps = set_snpid_index(df_ld_snps, allow_swapped_indel_alleles=self.allow_swapped_indel_alleles)
@@ -495,7 +495,7 @@ def compute_ld_plink(self, locus_start, locus_end, verbose):
         df_bim.rename(columns={'snp':'SNP', 'pos':'BP', 'chrom':'CHR', 'a0':'A2', 'a1':'A1'}, inplace=True)
         df_bim['A1'] = df_bim['A1'].astype('str')
         df_bim['A2'] = df_bim['A2'].astype('str')
-        df_bim['CHR'] = df_bim['CHR'].astype(np.int)
+        df_bim['CHR'] = df_bim['CHR'].astype(np.int64)
         del df_bim['i']
         del df_bim['cm']
         bed = bed.T
@@ -512,10 +512,10 @@ def compute_ld_plink(self, locus_start, locus_end, verbose):
             mem_limit = 1
         else:
             mem_limit = self.memory
-        chunk_size = np.int((np.float(mem_limit) * 0.8) / bed.shape[0] / 4 * (2**30))
+        chunk_size = np.int64((np.float64(mem_limit) * 0.8) / bed.shape[0] / 4 * (2**30))
         if chunk_size==0: chunk_size=1
         if chunk_size > bed.shape[1]: chunk_size = bed.shape[1]
-        num_chunks = np.int(np.ceil(bed.shape[1] / chunk_size))
+        num_chunks = np.int64(np.ceil(bed.shape[1] / chunk_size))
         if num_chunks>1:
             assert chunk_size * (num_chunks-2) < bed.shape[1]-1
         if chunk_size * (num_chunks-1) >= bed.shape[1]:
@@ -893,13 +893,13 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
         df_susie['DISTANCE_FROM_CENTER'] = np.abs(df_susie['BP'] - middle)        
 
         #mark causal sets
-        self.susie_dict = {key:np.array(susie_obj.rx2(key), dtype=np.object) for key in list(susie_obj.names)}
+        self.susie_dict = {key:np.array(susie_obj.rx2(key), dtype=object) for key in list(susie_obj.names)}
         df_susie['CREDIBLE_SET'] = 0
         susie_sets = self.susie_dict['sets'][0]
         #if type(susie_sets) != self.RNULLType:
         try:
             for set_i, susie_set in enumerate(susie_sets):
-                is_in_set = np.zeros(df_susie.shape[0], dtype=np.bool)
+                is_in_set = np.zeros(df_susie.shape[0], dtype=bool)
                 is_in_set[np.array(susie_set)-1] = True
                 is_in_set[df_susie['CREDIBLE_SET']>0] = False
                 df_susie.loc[is_in_set, 'CREDIBLE_SET'] = set_i+1
@@ -979,7 +979,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
             if ld_file is not None:
                 raise ValueError('cannot specify an ld file when assuming a single causal SNP per locus')
             ld_file = finemap_output_prefix+'.ld'
-            np.savetxt(ld_file, np.eye(self.df_sumstats_locus.shape[0], dtype=np.int), fmt='%s')
+            np.savetxt(ld_file, np.eye(self.df_sumstats_locus.shape[0], dtype=np.int64), fmt='%s')
         else:
             if ld_file is None:
                 ld_data = self.get_ld_data(locus_start, locus_end, need_bcor=True, verbose=verbose)
@@ -1016,7 +1016,7 @@ def finemap(self, locus_start, locus_end, num_causal_snps, use_prior_causal_prob
 
         #flip some of the alleles
         if num_causal_snps == 1:
-            is_flipped = np.zeros(self.df_sumstats_locus.shape[0], dtype=np.bool)
+            is_flipped = np.zeros(self.df_sumstats_locus.shape[0], dtype=bool)
         else:
             if ld_file.endswith('.bcor'):
                 bcor_obj = bcor(ld_file)

diff --git a/ldsc_polyfun/jackknife.py b/ldsc_polyfun/jackknife.py
@@ -705,7 +705,7 @@ def __init__(self, x, y, n_blocks=None, separators=None, chr_num=None, verbose=T
 
     def _divide_chromosomes_to_sets(self, chr_sizes, num_sets):
         chr_order = np.argsort(chr_sizes)[::-1]     #np.arange(len(chr_sizes))
-        chr_assignments = np.zeros(22, dtype=np.int) - 1
+        chr_assignments = np.zeros(22, dtype=np.int64) - 1
         chr_assignments[chr_order[:num_sets]] = np.arange(num_sets)
         set_sizes = chr_sizes[chr_order[:num_sets]].copy()    
         for c_i in chr_order[num_sets : len(chr_sizes)]:

diff --git a/ldsc_polyfun/sumstats.py b/ldsc_polyfun/sumstats.py
@@ -255,7 +255,7 @@ def _read_ld_sumstats(args, log, fh, alleles=True, dropna=True):
 
     #keep only requested annotations if --anno was specified
     if args.anno is not None:
-        cols_to_keep = np.zeros(len(ref_ld.columns), dtype=np.bool)        
+        cols_to_keep = np.zeros(len(ref_ld.columns), dtype=bool)        
         annotations = args.anno.split(',')
         is_found1 = np.isin(annotations, ref_ld.columns.str[:-2])
         is_found2 = np.isin(annotations, ref_ld.columns.str[:-4])

diff --git a/munge_polyfun_sumstats.py b/munge_polyfun_sumstats.py
@@ -21,7 +21,7 @@ def find_df_column(df, strings_to_find, allow_missing=False):
     if isinstance(strings_to_find, str):
         strings_to_find = [strings_to_find]
 
-    is_relevant_col = np.zeros(df.shape[1], dtype=np.bool)
+    is_relevant_col = np.zeros(df.shape[1], dtype=bool)
     for str_to_find in strings_to_find:
         is_relevant_col = is_relevant_col | (df.columns.str.upper() == str_to_find.upper())
     if np.sum(is_relevant_col)==0:
@@ -110,7 +110,7 @@ def compute_z(df_sumstats):
 def filter_sumstats(df_sumstats, min_info_score=None, min_maf=None, remove_strand_ambig=False, keep_hla=False):
 
     logging.info('%d SNPs are in the sumstats file'%(df_sumstats.shape[0]))
-    is_good_snp = np.ones(df_sumstats.shape[0], dtype=np.bool)
+    is_good_snp = np.ones(df_sumstats.shape[0], dtype=bool)
 
     #remove 'bad' BOLT-LMM SNPs
     if 'CHISQ_BOLT_LMM' in df_sumstats.columns:
@@ -142,7 +142,7 @@ def filter_sumstats(df_sumstats, min_info_score=None, min_maf=None, remove_stran
 
     #find strand ambiguous summary statistics
     if remove_strand_ambig:
-        is_strand_ambig = np.zeros(df_sumstats.shape[0], dtype=np.bool)
+        is_strand_ambig = np.zeros(df_sumstats.shape[0], dtype=bool)
         for ambig_pairs in [('A', 'T'), ('T', 'A'), ('C', 'G'), ('G', 'C')]:                    
             is_strand_ambig = is_strand_ambig | ((df_sumstats['A2']==ambig_pairs[0]) & (df_sumstats['A1']==ambig_pairs[1]))         
         is_good_snp = is_good_snp & (~is_strand_ambig)            
@@ -171,7 +171,7 @@ def filter_sumstats(df_sumstats, min_info_score=None, min_maf=None, remove_stran
 
 def compute_casecontrol_neff(df_sumstats):
     logging.info('Computing the effective sample size for case-control data...')    
-    Neff = (4.0 / (1.0/df_sumstats['N_CASES'] + 1.0/df_sumstats['N_CONTROLS'])).astype(np.int)
+    Neff = (4.0 / (1.0/df_sumstats['N_CASES'] + 1.0/df_sumstats['N_CONTROLS'])).astype(np.int64)
     return Neff
 
 

diff --git a/polyfun.py b/polyfun.py
@@ -405,7 +405,7 @@ def create_df_bins(self, bin_sizes, df_snpvar, df_snpvar_sorted=None, min_bin_si
         ind=0
         df_bins = pd.DataFrame(index=df_snpvar_sorted.index)        
         for bin_i, bin_size in enumerate(bin_sizes):
-            snpvar_bin = np.zeros(df_bins.shape[0], dtype=np.bool)
+            snpvar_bin = np.zeros(df_bins.shape[0], dtype=bool)
             snpvar_bin[ind : ind+bin_size] = True
             df_bins['snpvar_bin%d'%(len(bin_sizes) - bin_i)] = snpvar_bin
             ind += bin_size
@@ -462,7 +462,7 @@ def partition_snps_Ckmedian(self, args, use_ridge):
             seg_obj = median_seg_func(df_snpvar_sorted.values, k=np.array([5,30]))
         else:
             seg_obj = median_seg_func(df_snpvar_sorted.values, k=args.num_bins)
-        bin_sizes = np.array(seg_obj.rx2('size')).astype(np.int)
+        bin_sizes = np.array(seg_obj.rx2('size')).astype(np.int64)
         num_bins = len(bin_sizes)
         logging.info('Ckmedian.1d.dp partitioned SNPs into %d bins'%(num_bins))        
 

diff --git a/polyloc.py b/polyloc.py
@@ -204,16 +204,16 @@ def compute_Mp(self, p, cumsum_prop_h2, cumnum_binsize):
         num_jk = cumsum_prop_h2.shape[1]
 
         last_bin_index = np.argmax(cumsum_prop_h2 >= p, axis=0)
-        num_snps_bin1 = np.zeros(num_jk, dtype=np.int)
+        num_snps_bin1 = np.zeros(num_jk, dtype=np.int64)
         h2_bin1 = np.zeros(num_jk)
         num_snps_bin1[last_bin_index != 0] = cumnum_binsize[last_bin_index[last_bin_index != 0] - 1]
         h2_bin1[last_bin_index != 0] = cumsum_prop_h2[last_bin_index[last_bin_index != 0] - 1, np.arange(num_jk)[last_bin_index != 0]]
 
         num_snps_bin2 = cumnum_binsize[last_bin_index]        
         h2_bin2 = cumsum_prop_h2[last_bin_index, np.arange(num_jk)]
-        slope = (num_snps_bin2-num_snps_bin1).astype(np.float) / (h2_bin2-h2_bin1)
+        slope = (num_snps_bin2-num_snps_bin1).astype(np.float64) / (h2_bin2-h2_bin1)
         assert not np.any(np.isnan(slope))
-        Mp = np.ceil(num_snps_bin1 + slope * (p - h2_bin1)).astype(np.int)        
+        Mp = np.ceil(num_snps_bin1 + slope * (p - h2_bin1)).astype(np.int64)        
 
         return Mp
 

diff --git a/polypred.py b/polypred.py
@@ -47,7 +47,7 @@ def create_plink_range_file(df_betas, temp_dir, num_jk=200):
     scores_file = os.path.join(temp_dir, 'snp_scores.txt')
     separators = np.floor(np.linspace(0, df_betas.shape[0], num_jk+1)).astype(int)
     df_betas['score'] = 0
-    is_in_range = np.zeros(df_betas.shape[0], dtype=np.bool)
+    is_in_range = np.zeros(df_betas.shape[0], dtype=bool)
     for i in range(len(separators)-1):
         is_in_range[separators[i] : separators[i+1]] = True
         df_betas.loc[is_in_range, 'score'] = i+1.5
@@ -176,7 +176,7 @@ def load_betas_files(betas_file, verbose=True):
     df_betas.rename(columns={'sid':'SNP', 'nt1':'A1', 'nt2':'A2', 'BETA_MEAN':'BETA', 'ldpred_inf_beta':'BETA', 'chrom':'CHR', 'Chrom':'CHR', 'pos':'BP'}, inplace=True, errors='ignore')
     if not is_numeric_dtype(df_betas['CHR']):
         if df_betas['CHR'].str.startswith('chrom_').all():
-            df_betas['CHR'] = df_betas['CHR'].str[6:].astype(np.int)
+            df_betas['CHR'] = df_betas['CHR'].str[6:].astype(np.int64)
         else:
             raise ValueError('unknown CHR format')
     df_betas.rename(columns={'BETA_joint':'BETA', 'ALLELE1':'A1', 'ALLELE0':'A2', 'beta_mean':'BETA', 'MAF_BOLT':'A1Frq', 'Name':'SNP', 'A1Effect':'BETA', 'Name':'SNP', 'Chrom':'CHR', 'Position':'BP', 'beta':'BETA'}, inplace=True, errors='ignore')
@@ -284,7 +284,7 @@ def estimate_mixing_weights(args):
         float(df_pheno['PHENO'].iloc[0])
     except:
         df_pheno = df_pheno.iloc[1:]
-        df_pheno['PHENO'] = df_pheno['PHENO'].astype(np.float)
+        df_pheno['PHENO'] = df_pheno['PHENO'].astype(np.float64)
     if np.any(df_pheno.index.duplicated()):
         raise ValueError('duplicate ids found in %s'%(args.pheno))