Quantco · MatthiasSchmidtblaicherQC · Jan 29, 2024 · Jan 25, 2024 · Jan 25, 2024 · Jan 29, 2024
diff --git a/src/glum/_glm.py b/src/glum/_glm.py
@@ -2717,7 +2717,9 @@ def _set_up_and_check_fit_args(
 
                 if any(X.dtypes == "category"):
 
-                    def _expand_categorical_penalties(penalty, X, drop_first):
+                    def _expand_categorical_penalties(
+                        penalty, X, drop_first, has_missing_category
+                    ):
                         """
                         If P1 or P2 has the same shape as X before expanding the
                         categoricals, we assume that the penalty at the location of
@@ -2741,19 +2743,29 @@ def _expand_categorical_penalties(penalty, X, drop_first):
                                     chain.from_iterable(
                                         [
                                             elmt
-                                            for _ in dtype.categories[int(drop_first) :]
+                                            for _ in range(
+                                                len(dtype.categories)
+                                                + has_missing_category[col]
+                                                - drop_first
+                                            )
                                         ]
                                         if pd.api.types.is_categorical_dtype(dtype)
                                         else [elmt]
-                                        for elmt, dtype in zip(penalty, X.dtypes)
+                                        for elmt, (col, dtype) in zip(
+                                            penalty, X.dtypes.items()
+                                        )
                                     )
                                 )
                             )
                         else:
                             return penalty
 
-                    P1 = _expand_categorical_penalties(self.P1, X, self.drop_first)
-                    P2 = _expand_categorical_penalties(self.P2, X, self.drop_first)
+                    P1 = _expand_categorical_penalties(
+                        self.P1, X, self.drop_first, self.has_missing_category_
+                    )
+                    P2 = _expand_categorical_penalties(
+                        self.P2, X, self.drop_first, self.has_missing_category_
+                    )
 
                 X = tm.from_pandas(
                     X,

diff --git a/tests/glm/test_glm.py b/tests/glm/test_glm.py
@@ -53,7 +53,7 @@
 
 
 def get_small_x_y(
-    estimator: Union[GeneralizedLinearRegressor, GeneralizedLinearRegressorCV]
+    estimator: Union[GeneralizedLinearRegressor, GeneralizedLinearRegressorCV],
 ) -> tuple[np.ndarray, np.ndarray]:
     if isinstance(estimator, GeneralizedLinearRegressor):
         n_rows = 1
@@ -362,6 +362,43 @@ def test_P1_P2_expansion_with_categoricals():
     np.testing.assert_allclose(mdl1.coef_, mdl2.coef_)
 
 
+def test_P1_P2_expansion_with_categoricals_missings():
+    rng = np.random.default_rng(42)
+    X = pd.DataFrame(
+        data={
+            "dense": np.linspace(0, 10, 60),
+            "cat": pd.Categorical(rng.integers(5, size=60)).remove_categories(0),
+        }
+    )
+    y = rng.normal(size=60)
+
+    mdl1 = GeneralizedLinearRegressor(
+        l1_ratio=0.01,
+        P1=[1, 2, 2, 2, 2, 2],
+        P2=[2, 1, 1, 1, 1, 1],
+        cat_missing_method="convert",
+    )
+    mdl1.fit(X, y)
+
+    mdl2 = GeneralizedLinearRegressor(
+        l1_ratio=0.01,
+        P1=[1, 2],
+        P2=[2, 1],
+        cat_missing_method="convert",
+    )
+    mdl2.fit(X, y)
+    np.testing.assert_allclose(mdl1.coef_, mdl2.coef_)
+
+    mdl3 = GeneralizedLinearRegressor(
+        l1_ratio=0.01,
+        P1=[1, 2],
+        P2=sparse.diags([2, 1, 1, 1, 1, 1]),
+        cat_missing_method="convert",
+    )
+    mdl3.fit(X, y)
+    np.testing.assert_allclose(mdl1.coef_, mdl3.coef_)
+
+
 @pytest.mark.parametrize(
     "estimator", [GeneralizedLinearRegressor, GeneralizedLinearRegressorCV]
 )