Skip to content

Wikilingua Cross Lingual#809

Open
Muennighoff wants to merge 4 commits into
bigscience-workshop:eval-hackathonfrom
Muennighoff:wikilingua
Open

Wikilingua Cross Lingual#809
Muennighoff wants to merge 4 commits into
bigscience-workshop:eval-hackathonfrom
Muennighoff:wikilingua

Conversation

@Muennighoff
Copy link
Copy Markdown

This is a bit hacky, maybe you don't want it

from promptsource.templates import DatasetTemplates
from functools import partial
import multiprocessing
import os
import datasets
from datasets import load_dataset
from functools import partial

ds_name, subset_name = "GEM/wiki_lingua", "fr_zh"


def add_code(example):
    example["source_language_name"] = "French"
    example["target_language_name"] = "Chinese"
    return example

def filter_a_b(example, lang_a, lang_b):
    return example["source_language"] == lang_a and example["target_language"] == lang_b

ds = load_dataset(ds_name, subset_name, split="train")
ds = ds.map(add_code)
ds = ds.filter(partial(filter_a_b, lang_a=subset_name.split("_")[0], lang_b=subset_name.split("_")[1]))

prompts = DatasetTemplates(f"{ds_name}/en_en")
for t_name in prompts.all_template_names:
    print(prompts[t_name].apply(ds[1]))
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment

Labels

None yet

1 participant