Skip to content
Draft
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Prev Previous commit
Next Next commit
update text conversion
  • Loading branch information
ngxson committed Jun 19, 2025
commit d8589f8bb45bbcaef049dc780b8721f8113ebc7c
7 changes: 5 additions & 2 deletions convert_hf_to_gguf.py
Original file line number Diff line number Diff line change
Expand Up @@ -4262,7 +4262,7 @@ def set_gguf_parameters(self):
def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iterable[tuple[str, Tensor]]:
del bid # unused

if name.startswith("language_model."):
if "language_model." in name:
name = name.replace("language_model.", "")

elif name.startswith("multi_modal_projector.") or name.startswith("vision_tower.") \
Expand Down Expand Up @@ -4336,7 +4336,7 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter
return [] # skip other tensors


@ModelBase.register("Gemma3p5ForCausalLM")
@ModelBase.register("Gemma3nForConditionalGeneration")
class Gemma3NModel(Gemma3Model):
model_arch = gguf.MODEL_ARCH.GEMMA3N
norm_shift = 0.0 # same value with Gemma3p5RMSNorm scale_shift on python code
Expand Down Expand Up @@ -4374,6 +4374,9 @@ def modify_tensors(self, data_torch: Tensor, name: str, bid: int | None) -> Iter

# TODO: implement self.prediction_coefs.weight.clamp_(...)

if "language_model." not in name:
return [] # skip non-language model tensors

if "embed_tokens_per_layer.weight" in name:
hidden_size_per_layer_input = 256
data_torch = data_torch * (hidden_size_per_layer_input**0.5)
Expand Down