Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
38 changes: 38 additions & 0 deletions modules/loaders.py
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,11 @@
'trust_remote_code',
'no_use_fast',
'no_flash_attn',
],
'HQQ': [
'trust_remote_code',
'no_use_fast',
'no_flash_attn',
]
})

Expand Down Expand Up @@ -495,6 +500,39 @@
'skip_special_tokens',
'auto_max_new_tokens',
},
'HQQ': {
'temperature',
'temperature_last',
'top_p',
'min_p',
'top_k',
'typical_p',
'epsilon_cutoff',
'eta_cutoff',
'tfs',
'top_a',
'repetition_penalty',
'presence_penalty',
'frequency_penalty',
'repetition_penalty_range',
'encoder_repetition_penalty',
'no_repeat_ngram_size',
'min_length',
'seed',
'do_sample',
'mirostat_mode',
'mirostat_tau',
'mirostat_eta',
'grammar_file_row',
'grammar_string',
'guidance_scale',
'negative_prompt',
'ban_eos_token',
'custom_token_bans',
'add_bos_token',
'skip_special_tokens',
'auto_max_new_tokens',
},
}

loaders_model_types = {
Expand Down
10 changes: 10 additions & 0 deletions modules/models.py
Original file line number Diff line number Diff line change
Expand Up @@ -72,6 +72,7 @@ def load_model(model_name, loader=None):
'ctransformers': ctransformers_loader,
'AutoAWQ': AutoAWQ_loader,
'QuIP#': QuipSharp_loader,
'HQQ': HQQ_loader,
}

metadata = get_model_metadata(model_name)
Expand Down Expand Up @@ -404,6 +405,15 @@ def ExLlamav2_HF_loader(model_name):

return Exllamav2HF.from_pretrained(model_name)

def HQQ_loader(model_name):
from hqq.engine.hf import HQQModelForCausalLM
from hqq.core.quantize import HQQLinear, HQQBackend

model_dir = f'{shared.args.model_dir}/{model_name}'
logger.warning(f"loading HQQ model from {model_dir} with HQQLinear backend")
model = HQQModelForCausalLM.from_quantized(model_dir)
HQQLinear.set_backend(HQQBackend.PYTORCH_COMPILE)
return model

def RWKV_loader(model_name):
'''
Expand Down
2 changes: 2 additions & 0 deletions modules/shared.py
Original file line number Diff line number Diff line change
Expand Up @@ -243,6 +243,8 @@ def fix_loader_name(name):
return 'AutoAWQ'
elif name in ['quip#', 'quip-sharp', 'quipsharp', 'quip_sharp']:
return 'QuIP#'
elif name in ['HQQ']:
return 'HQQ'


def add_extension(name, last=False):
Expand Down
1 change: 1 addition & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@ tqdm
wandb

git+https://github.com/oobabooga/torch-grammar.git
git+https://github.com/mobiusml/hqq.git

# bitsandbytes
bitsandbytes==0.41.1; platform_system != "Windows"
Expand Down