Skip to content

Commit

Permalink
fix typos (#3036)
Browse files Browse the repository at this point in the history
  • Loading branch information
RainRat authored May 4, 2023
1 parent b509c2b commit 9bcc916
Show file tree
Hide file tree
Showing 17 changed files with 20 additions and 20 deletions.
4 changes: 2 additions & 2 deletions backend/export.py
Original file line number Diff line number Diff line change
Expand Up @@ -232,7 +232,7 @@ def export_trees(
db,
message_tree_id=tree_id,
deleted=deleted,
synthetic=None, # pass None here (export trees, filtering happend in fetch_tree_ids)
synthetic=None, # pass None here (export trees, filtering happened in fetch_tree_ids)
prompts_only=prompts_only,
lang=None, # pass None, trees were selected based on lang of prompt
review_result=review_result,
Expand All @@ -255,7 +255,7 @@ def export_trees(
db,
message_tree_id=tree_id,
deleted=deleted,
synthetic=None, # pass None here (export trees, filtering happend in fetch_tree_ids)
synthetic=None, # pass None here (export trees, filtering happened in fetch_tree_ids)
prompts_only=prompts_only,
lang=None, # pass None here, trees were selected based on lang of prompt
review_result=review_result,
Expand Down
2 changes: 1 addition & 1 deletion backend/oasst_backend/scheduled_tasks.py
Original file line number Diff line number Diff line change
Expand Up @@ -94,7 +94,7 @@ def update_user_streak() -> None:
if lastactitvitydelta.days > 1 or user.streak_days is None:
user.streak_days = 0
user.streak_last_day_date = current_time
# streak_last_day_date has a current timestamp in DB. Idealy should not be NULL.
# streak_last_day_date has a current timestamp in DB. Ideally should not be NULL.
if streak_last_day_date is not None:
streak_delta = current_time - streak_last_day_date
# if user completed tasks on consecutive days then increment the streak days
Expand Down
2 changes: 1 addition & 1 deletion backend/oasst_backend/utils/language_classification.py
Original file line number Diff line number Diff line change
Expand Up @@ -60,7 +60,7 @@ def main(foldername, modelname, num_words):
save_model(clf, langmap, num_words, modelname)
model = load(modelname)
print(
"running infernence on long tests",
"running inference on long tests",
inference_voter(
model,
"""
Expand Down
2 changes: 1 addition & 1 deletion data/datasets/nsfw_selfharm_reddit/utils/reddit.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ def init_praw_reddit(client_id: str | None = None, client_secret: str | None = N
def scrap_subreddit(subreddit: str, reddit) -> pd.DataFrame | None:
"""
Scrap "hot", "top", "rising" given a subreddit and return
dedupped DataFrame.
deduped DataFrame.
"""
items = []
dfs = []
Expand Down
6 changes: 3 additions & 3 deletions data/datasets/safety_directory/child_help/child_help.py
Original file line number Diff line number Diff line change
Expand Up @@ -322,7 +322,7 @@
"ChildHelp Sierra Leone": {
"region": "Sierra Leone",
"page": "https://childhelplineinternational.org/sierra-leone-childhelp-sierra-leone/",
"description": "ChildHelp Sierra Leone is designed specifically to meet both immediate and long-term needs of impoverished and underprivileged needy children, their families and communities in needand living in difficult circumstances.",
"description": "ChildHelp Sierra Leone is designed specifically to meet both immediate and long-term needs of impoverished and underprivileged needy children, their families and communities in need and living in difficult circumstances.",
"contacts": {
"Website": {"type": "website", "link": "http://www.childhelpsl.org/"},
"+232 78 666269": {"type": "phone", "link": "tel:+23278666269"},
Expand Down Expand Up @@ -598,7 +598,7 @@
"Ayudo pa mucha i hoben 918": {
"region": "Curaçao",
"page": "https://childhelplineinternational.org/curacao-ayudo-pa-mucha-i-hoben-918/",
"description": "Ayudo pa mucha i hoben 918 is free to contact via phone, chat, and forum. It is avaialble for all children, young people and young adults on Curaçao up to and including 25 years old.",
"description": "Ayudo pa mucha i hoben 918 is free to contact via phone, chat, and forum. It is available for all children, young people and young adults on Curaçao up to and including 25 years old.",
"contacts": {
"Website": {"type": "website", "link": "https://www.918.cw"},
"918": {"type": "phone", "link": "tel:918"},
Expand Down Expand Up @@ -1223,7 +1223,7 @@
"National Hotline for Child Protection 111": {
"region": "Vietnam",
"page": "https://childhelplineinternational.org/vietnam-national-hotline-for-child-protection-111/",
"description": "The Vietnamese National Hotline for Child Protection is run by the Department of Child Affairs, part of the Ministry of Labour, Invalids and Social Affairs. While the hotline deals primarily with information, reports and denuncations on risks and acts of child abuse, it also provides counselling to children.",
"description": "The Vietnamese National Hotline for Child Protection is run by the Department of Child Affairs, part of the Ministry of Labour, Invalids and Social Affairs. While the hotline deals primarily with information, reports and denunciations on risks and acts of child abuse, it also provides counselling to children.",
"contacts": {
"Website": {"type": "website", "link": "http://tongdai111.vn/"},
"111": {"type": "phone", "link": "tel:111"},
Expand Down
2 changes: 1 addition & 1 deletion model/model_eval/README.md
Original file line number Diff line number Diff line change
Expand Up @@ -60,4 +60,4 @@ python model/model_eval/rejection_sampling.py --data_path model/model_eval/manua
}
```

- additionally, selected and rejected samples will be saved to seperate files
- additionally, selected and rejected samples will be saved to separate files
2 changes: 1 addition & 1 deletion model/model_eval/eval_rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def batch_inference(inputs, model):
if __name__ == "__main__":
parser = argparse.ArgumentParser(description="")
parser.add_argument("--dataset", type=str, help="name of evaluation dataset")
parser.add_argument("--split", type=str, help="dataset splits seperated by comma", default="train")
parser.add_argument("--split", type=str, help="dataset splits separated by comma", default="train")
parser.add_argument("--model", type=str, help="Path or url of the model file")
parser.add_argument("--metrics", type=str, help="metrics to evaluate", default="accuracy")
parser.add_argument("--batch_size", type=int, help="Batch Size", default=8)
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/check_dataset_counts.py
Original file line number Diff line number Diff line change
Expand Up @@ -89,7 +89,7 @@ def argument_parsing(notebook=False, notebook_args=None):
raise ValueError(
f'Error: Could not find the dataset "{name}" in {mode.config_name()}. ',
f"Tried to look for this dataset within th key {mode.default_config()} ",
"and as seperate key.",
"and as separate key.",
)

datasets_list.extend(datasets_value)
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/custom_datasets/formatting.py
Original file line number Diff line number Diff line change
Expand Up @@ -135,7 +135,7 @@ def get_formatted(self, mode: Mode, eos_token: str, **kwargs) -> str | list[str]

@classmethod
def create_from_prompter_assistant_interplay(cls, qa: dict[str, str]):
"""Creates a DatasetEntry from a qa of given structure. Even if qa contains consecutative assistant or prompter phrases.
"""Creates a DatasetEntry from a qa of given structure. Even if qa contains consecutive assistant or prompter phrases.
Returns:
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/custom_datasets/oasst_dataset.py
Original file line number Diff line number Diff line change
Expand Up @@ -54,7 +54,7 @@ def load_oasst_export(
if tree.tree_state not in ("ready_for_export", "prompt_lottery_waiting"):
continue

# extract all threads up to last asssitant reply
# extract all threads up to last assistant reply
threads: list[list[ExportMessageNode]] = []

def thread_filter(thread: list[ExportMessageNode]) -> bool:
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/models/gptj.py
Original file line number Diff line number Diff line change
Expand Up @@ -74,7 +74,7 @@ def __init__(self, weight, absmax, code):

def forward(self, input, **kwargs):
with torch.no_grad():
# note: both quantuized weights and input indices are *not* differentiable
# note: both quantized weights and input indices are *not* differentiable
weight_deq = dequantize_blockwise(self.weight, absmax=self.absmax, code=self.code)
output = F.embedding(input, weight_deq, **kwargs)
if self.adapter:
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/tools/augment_oasst.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import json
import os

# so far load_oasst_export is pretty determinstic in thread order
# so far load_oasst_export is pretty deterministic in thread order
# means the train, val split stay the same
from model_training.custom_datasets.oasst_dataset import load_oasst_export
from model_training.models.reward_model import GPTNeoXRewardModel
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/trainer_rl.py
Original file line number Diff line number Diff line change
Expand Up @@ -88,7 +88,7 @@ def reward_fn(samples, prompts, outputs):
for i in range(math.ceil(len(samples) / mbs)):
batch_ixs = slice(i * mbs, (i + 1) * mbs)

# We specififed int32 as types for a triton client
# We specified int32 as types for a triton client
result = client.infer(
triton_model,
[
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/trainer_rm.py
Original file line number Diff line number Diff line change
Expand Up @@ -163,7 +163,7 @@ def argument_parsing(notebook=False, notebook_args=None):
conf["rng_seed"] = args.rng_seed
conf["show_dataset_stats"] = args.show_dataset_stats

# get the world size in deeepspeed
# get the world size in deepspeed
if conf["deepspeed"]:
conf["world_size"] = int(os.getenv("WORLD_SIZE", default="1"))
else:
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/trainer_sft.py
Original file line number Diff line number Diff line change
Expand Up @@ -215,7 +215,7 @@ def argument_parsing(notebook=False, notebook_args=None):
conf["rng_seed"] = args.rng_seed
conf["show_dataset_stats"] = args.show_dataset_stats

# get the world size in deeepspeed
# get the world size in deepspeed
if conf["deepspeed"]:
conf["world_size"] = int(os.getenv("WORLD_SIZE", default="1"))
else:
Expand Down
2 changes: 1 addition & 1 deletion model/model_training/utils/ppo_utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -553,7 +553,7 @@ def ref_model(all_tokens, attention_masks):
for i in range(math.ceil(len(all_tokens) / mbs)):
batch_ixs = slice(i * mbs, (i + 1) * mbs)

# We specififed int32 as types for a triton client
# We specified int32 as types for a triton client
result = client.infer(
triton_model,
[
Expand Down
2 changes: 1 addition & 1 deletion scripts/postprocessing/scoring.py
Original file line number Diff line number Diff line change
Expand Up @@ -101,7 +101,7 @@ def score_update_prompts(consensus: npt.ArrayLike, voter_data: Voter) -> Voter:
# produces the ranking of votes, e.g. for [100,300,200] it returns [0, 2, 1],
# since 100 is the lowest, 300 the highest and 200 the middle value
consensus_ranking = np.arange(len(consensus)) - len(consensus) // 2 + 1
# expected consenus ranking (i.e. normalize the votes and multiply-sum with weightings)
# expected consensus ranking (i.e. normalize the votes and multiply-sum with weightings)
delta_votes = np.sum(consensus_ranking * consensus / sum(consensus))
new_points = delta_votes + voter_data.prompt_points

Expand Down

0 comments on commit 9bcc916

Please sign in to comment.