https://huggingface.co/Bruno/Harpia-7b-guanacoLora/blob/main/har.png
This adapter was created with the PEFT library and allowed the base model Falcon-7b to be fine-tuned on the timdettmers/openassistant-guanaco by using the method QLoRA .
TBA
TBA
import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, GenerationConfig peft_model_id = "Bruno/Harpia-7b-guanacoLora" config = PeftConfig.from_pretrained(peft_model_id) bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ) tokenizer = AutoTokenizer.from_pretrained(peft_model_id) model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, quantization_config=bnb_config, trust_remote_code=True, device_map={"":0}) prompt_input = "" prompt_no_input = "" def create_prompt(instruction, input=None): if input: return prompt_input.format(instruction=instruction, input=input) else: return prompt_no_input.format(instruction=instruction) def generate( instruction, input=None, max_new_tokens=128, temperature=0.1, top_p=0.75, top_k=40, num_beams=4, **kwargs, ): prompt = create_prompt(instruction, input) inputs = tokenizer(prompt, return_tensors="pt") input_ids = inputs["input_ids"].to("cuda") attention_mask = inputs["attention_mask"].to("cuda") generation_config = GenerationConfig( temperature=temperature, top_p=top_p, top_k=top_k, num_beams=num_beams, **kwargs, ) with torch.no_grad(): generation_output = model.generate( input_ids=input_ids, attention_mask=attention_mask, generation_config=generation_config, return_dict_in_generate=True, output_scores=True, max_new_tokens=max_new_tokens ) s = generation_output.sequences[0] output = tokenizer.decode(s) return output.split("### Respuesta:")[1] instruction = "Me conte algumas curiosidades sobre o Brasil" print("Instruções:", instruction) print("Resposta:", generate(instruction))
https://huggingface.co/Bruno/Harpia-7b-guanacoLora/blob/main/har.png
使用 PEFT 库创建了此适配器,该适配器允许基本模型 Falcon-7b 通过使用 QLoRA 方法在 timdettmers/openassistant-guanaco 上进行微调。
TBA
TBA
import torch from peft import PeftModel, PeftConfig from transformers import AutoModelForCausalLM, AutoTokenizer, BitsAndBytesConfig, AutoTokenizer, GenerationConfig peft_model_id = "Bruno/Harpia-7b-guanacoLora" config = PeftConfig.from_pretrained(peft_model_id) bnb_config = BitsAndBytesConfig( load_in_4bit=True, bnb_4bit_quant_type="nf4", bnb_4bit_compute_dtype=torch.float16, ) tokenizer = AutoTokenizer.from_pretrained(peft_model_id) model = AutoModelForCausalLM.from_pretrained(config.base_model_name_or_path, return_dict=True, quantization_config=bnb_config, trust_remote_code=True, device_map={"":0}) prompt_input = "" prompt_no_input = "" def create_prompt(instruction, input=None): if input: return prompt_input.format(instruction=instruction, input=input) else: return prompt_no_input.format(instruction=instruction) def generate( instruction, input=None, max_new_tokens=128, temperature=0.1, top_p=0.75, top_k=40, num_beams=4, **kwargs, ): prompt = create_prompt(instruction, input) inputs = tokenizer(prompt, return_tensors="pt") input_ids = inputs["input_ids"].to("cuda") attention_mask = inputs["attention_mask"].to("cuda") generation_config = GenerationConfig( temperature=temperature, top_p=top_p, top_k=top_k, num_beams=num_beams, **kwargs, ) with torch.no_grad(): generation_output = model.generate( input_ids=input_ids, attention_mask=attention_mask, generation_config=generation_config, return_dict_in_generate=True, output_scores=True, max_new_tokens=max_new_tokens ) s = generation_output.sequences[0] output = tokenizer.decode(s) return output.split("### Respuesta:")[1] instruction = "Me conte algumas curiosidades sobre o Brasil" print("Instruções:", instruction) print("Resposta:", generate(instruction))