limit all docstrings / comments to 88 chars

Dazu-io · Mar 9, 2021 · 1369688 · 1369688
1 parent 6929f65
commit 1369688
Show file tree

Hide file tree

Showing 50 changed files with 286 additions and 182 deletions.
diff --git a/rasa/core/actions/forms.py b/rasa/core/actions/forms.py
@@ -389,8 +389,8 @@ async def validate_slots(
         them. Otherwise there is no validation.
 
         Args:
-            slot_candidates: Extracted slots which are candidates to fill the slots required
-                by the form.
+            slot_candidates: Extracted slots which are candidates to fill the slots
+                required by the form.
             tracker: The current conversation tracker.
             domain: The current model domain.
             output_channel: The output channel which can be used to send messages
@@ -605,7 +605,7 @@ async def _ask_for_slot(
     # helpers
     @staticmethod
     def _to_list(x: Optional[Any]) -> List[Any]:
-        """Convert object to a list if it is not a list, `None` converted to empty list."""
+        """Convert object to a list if it isn't."""
         if x is None:
             x = []
         elif not isinstance(x, list):

diff --git a/rasa/core/channels/hangouts.py b/rasa/core/channels/hangouts.py
@@ -108,7 +108,8 @@ async def _persist_message(self, message: Dict) -> None:
             msg_new = "cards"
         else:
             raise Exception(
-                "Your message to Hangouts channel must either contain 'text' or 'cards'!"
+                "Your message to Hangouts channel must either contain 'text' or "
+                "'cards'!"
             )
 
         # depending on above outcome, convert messages into same type and combine
@@ -248,7 +249,7 @@ def _extract_input_channel(self) -> Text:
         return self.name()
 
     def _check_token(self, bot_token: Text) -> None:
-        # see https://developers.google.com/hangouts/chat/how-tos/bots-develop#verifying_bot_authenticity
+        # see https://developers.google.com/hangouts/chat/how-tos/bots-develop#verifying_bot_authenticity # noqa: W505
         try:
             token = client.verify_id_token(
                 bot_token, self.project_id, cert_uri=CERT_URI
@@ -301,7 +302,8 @@ async def receive(request: Request) -> HTTPResponse:
                 )
             except Exception as e:
                 logger.exception(
-                    f"An exception occurred while handling user message: {e}, text: {text}"
+                    f"An exception occurred while handling user message: {e}, "
+                    f"text: {text}"
                 )
 
             return response.json(collector.messages)

diff --git a/rasa/core/channels/mattermost.py b/rasa/core/channels/mattermost.py
@@ -98,9 +98,8 @@ async def send_text_with_buttons(
         **kwargs: Any,
     ) -> None:
         """Sends buttons to the output."""
-
         # buttons are a list of objects: [(option_name, payload)]
-        # See https://docs.mattermost.com/developer/interactive-messages.html#message-buttons
+        # See https://docs.mattermost.com/developer/interactive-messages.html#message-buttons # noqa: W505
 
         actions = [
             {

diff --git a/rasa/core/channels/slack.py b/rasa/core/channels/slack.py
@@ -383,18 +383,21 @@ async def process_message(
         return response.text("")
 
     def get_metadata(self, request: Request) -> Dict[Text, Any]:
-        """Extracts the metadata from a slack API event (https://api.slack.com/types/event).
+        """Extracts the metadata from a slack API event.
+
+        Slack Documentation: https://api.slack.com/types/event
 
         Args:
             request: A `Request` object that contains a slack API event in the body.
 
         Returns:
-            Metadata extracted from the sent event payload. This includes the output channel for the response,
-            and users that have installed the bot.
+            Metadata extracted from the sent event payload. This includes the output
+                channel for the response, and users that have installed the bot.
         """
         content_type = request.headers.get("content-type")
 
-        # Slack API sends either a JSON-encoded or a URL-encoded body depending on the content
+        # Slack API sends either a JSON-encoded or a URL-encoded body depending on the
+        # content
         if content_type == "application/json":
             # if JSON-encoded message is received
             slack_event = request.json
@@ -544,7 +547,8 @@ async def webhook(request: Request) -> HTTPResponse:
                             request, on_new_message, text, sender_id, metadata
                         )
                     if payload["actions"][0]["type"] == "button":
-                        # link buttons don't have "value", don't send their clicks to bot
+                        # link buttons don't have "value", don't send their clicks to
+                        # bot
                         return response.text("User clicked link button")
                 return response.text(
                     "The input message could not be processed.",

diff --git a/rasa/core/interpreter.py b/rasa/core/interpreter.py
@@ -151,7 +151,8 @@ def featurize_message(self, message: Message) -> Optional[Message]:
         Args:
             message: storing text to process
         Returns:
-            message containing tokens and features which are the output of the NLU pipeline
+            message containing tokens and features which are the output of the NLU
+            pipeline
         """
         if self.lazy_init and self.interpreter is None:
             self._load_interpreter()

diff --git a/rasa/core/policies/ted_policy.py b/rasa/core/policies/ted_policy.py
@@ -838,7 +838,8 @@ def load(
                     feature_name: features
                     for feature_name, features in model_data_example.items()
                     if feature_name
-                    # we need to remove label features for prediction if they are present
+                    # we need to remove label features for prediction if they are
+                    # present
                     in PREDICTION_FEATURES
                 },
             )

diff --git a/rasa/core/training/story_conflict.py b/rasa/core/training/story_conflict.py
@@ -237,7 +237,8 @@ def _find_conflicting_states(
         tokenizer: A tokenizer to tokenize the user messages.
 
     Returns:
-        A dictionary mapping state-hashes to a list of actions that follow from each state.
+        A dictionary mapping state-hashes to a list of actions that follow from each
+        state.
     """
     # Create a 'state -> list of actions' dict, where the state is
     # represented by its hash
@@ -269,8 +270,8 @@ def _build_conflicts_from_states(
         trackers: Trackers that contain the states.
         domain: The domain object.
         max_history: Number of turns to take into account for the state descriptions.
-        conflicting_state_action_mapping: A dictionary mapping state-hashes to a list of actions
-                                          that follow from each state.
+        conflicting_state_action_mapping: A dictionary mapping state-hashes to a list
+            of actions that follow from each state.
         tokenizer: A tokenizer to tokenize the user messages.
 
     Returns:
@@ -331,7 +332,8 @@ def _sliced_states_iterator(
                 )
                 if tokenizer:
                     _apply_tokenizer_to_states(tokenizer, sliced_states)
-                # ToDo: deal with oov (different tokens can lead to identical features if some of those tokens are out of vocabulary for all featurizers)
+                # TODO: deal with oov (different tokens can lead to identical features
+                # if some of those tokens are out of vocabulary for all featurizers)
                 yield TrackerEventStateTuple(tracker, event, sliced_states)
                 idx += 1
 
@@ -395,9 +397,11 @@ def _get_previous_event(
             previous_event_name = state[USER]["text"]
 
     if not isinstance(previous_event_name, (str, type(None))):
-        # While the Substate type doesn't restrict the value of `action_text` / `intent`, etc. to be a string, it always should be
+        # While the Substate type doesn't restrict the value of `action_text` /
+        # `intent`, etc. to be a string, it always should be
         raise TypeError(
-            f"The value '{previous_event_name}' in the substate should be a string or None, not {type(previous_event_name)}. Did you modify Rasa source code?"
+            f"The value '{previous_event_name}' in the substate should be a string or "
+            f"None, not {type(previous_event_name)}. Did you modify Rasa source code?"
         )
 
     return previous_event_type, previous_event_name
diff --git a/rasa/model.py b/rasa/model.py
@@ -455,12 +455,12 @@ def should_retrain(
         old_model: Path to the old zipped model file.
         train_path: Path to the directory in which the new model will be trained.
         has_e2e_examples: Whether the new training data contains e2e examples.
-        force_training: Indicates if the model needs to be retrained even if the data has not changed.
+        force_training: Indicates if the model needs to be retrained even if the data
+            has not changed.
 
     Returns:
-        A FingerprintComparisonResult object indicating whether Rasa Core and/or Rasa NLU needs
-        to be retrained or not.
-
+        A FingerprintComparisonResult object indicating whether Rasa Core and/or Rasa
+        NLU needs to be retrained or not.
     """
     fingerprint_comparison = FingerprintComparisonResult()
 

diff --git a/rasa/nlu/components.py b/rasa/nlu/components.py
@@ -570,7 +570,8 @@ def train(
         of components previous to this one.
 
         Args:
-            training_data: The :class:`rasa.shared.nlu.training_data.training_data.TrainingData`.
+            training_data: The
+                :class:`rasa.shared.nlu.training_data.training_data.TrainingData`.
             config: The model configuration parameters.
         """
         pass
@@ -588,7 +589,8 @@ def process(self, message: Message, **kwargs: Any) -> None:
         of components previous to this one.
 
         Args:
-            message: The :class:`rasa.shared.nlu.training_data.message.Message` to process.
+            message: The :class:`rasa.shared.nlu.training_data.message.Message` to
+                process.
         """
         pass
 
@@ -693,7 +695,8 @@ def can_handle_language(cls, language: Hashable) -> bool:
             `True` if component can handle specific language, `False` otherwise.
         """
 
-        # If both `supported_language_list` and `not_supported_language_list` are set to `None`,
+        # If both `supported_language_list` and `not_supported_language_list` are set
+        # to `None`,
         # it means: support all languages
         if language is None or (
             cls.supported_language_list is None
@@ -703,9 +706,11 @@ def can_handle_language(cls, language: Hashable) -> bool:
 
         # check language supporting settings
         if cls.supported_language_list and cls.not_supported_language_list:
-            # When user set both language supporting settings to not None, it will lead to ambiguity.
+            # When user set both language supporting settings to not None, it will lead
+            # to ambiguity.
             raise RasaException(
-                "Only one of `supported_language_list` and `not_supported_language_list` can be set to not None"
+                "Only one of `supported_language_list` and"
+                "`not_supported_language_list` can be set to not None"
             )
 
         # convert to `list` for membership test
@@ -723,7 +728,8 @@ def can_handle_language(cls, language: Hashable) -> bool:
         # check if user provided a valid setting
         if not supported_language_list and not not_supported_language_list:
             # One of language settings must be valid (not None and not a empty list),
-            # There are three combinations of settings are not valid: (None, []), ([], None) and ([], [])
+            # There are three combinations of settings are not valid:
+            # (None, []), ([], None) and ([], [])
             raise RasaException(
                 "Empty lists for both "
                 "`supported_language_list` and `not_supported language_list` "

diff --git a/rasa/nlu/emulators/dialogflow.py b/rasa/nlu/emulators/dialogflow.py
@@ -15,8 +15,9 @@
 
 
 class DialogflowEmulator(Emulator):
-    """Emulates the response format of the DialogFlow projects.agent.environments.users.sessions.detectIntent
+    """Emulates the response format of the DialogFlow.
 
+    # noqa: W505
     https://cloud.google.com/dialogflow/es/docs/reference/rest/v2/projects.agent.environments.users.sessions/detectIntent
     https://cloud.google.com/dialogflow/es/docs/reference/rest/v2/DetectIntentResponse
     """

diff --git a/rasa/nlu/emulators/wit.py b/rasa/nlu/emulators/wit.py
@@ -17,7 +17,8 @@
 class WitEmulator(Emulator):
     """Emulates the response format of this wit.ai endpoint.
 
-    More information about the endpoint: https://wit.ai/docs/http/20200513/#get__message_link
+    More information about the endpoint:
+    https://wit.ai/docs/http/20200513/#get__message_link
     """
 
     def normalise_response_json(self, data: Dict[Text, Any]) -> Dict[Text, Any]:

diff --git a/rasa/nlu/extractors/extractor.py b/rasa/nlu/extractors/extractor.py
@@ -335,10 +335,14 @@ def _check_is_single_entity(
             return True
 
         # Tokens need to be no further than 3 positions apart
-        # The magic number 3 is chosen such that the following two cases can be extracted
-        #   - Schönhauser Allee 175, 10119 Berlin (address compounds separated by 2 tokens (", "))
-        #   - 22 Powderhall Rd., EH7 4GB (abbreviated "Rd." results in a separation of 3 tokens ("., "))
-        # More than 3 might already introduce cases that shouldn't be considered by this logic
+        # The magic number 3 is chosen such that the following two cases can be
+        # extracted
+        #   - Schönhauser Allee 175, 10119 Berlin
+        #     (address compounds separated by 2 tokens (", "))
+        #   - 22 Powderhall Rd., EH7 4GB
+        #     (abbreviated "Rd." results in a separation of 3 tokens ("., "))
+        # More than 3 might already introduce cases that shouldn't be considered by
+        # this logic
         tokens_within_range = token.start - last_token_end <= 3
 
         # The interleaving tokens *must* be a full stop, a comma, or a whitespace
@@ -469,9 +473,11 @@ def check_correct_entity_annotations(training_data: TrainingData) -> None:
                         f"with intent '{example.get(INTENT)}'. Make sure the start and "
                         f"end values of entities ({entities_repr}) in the training "
                         f"data match the token boundaries ({tokens_repr}). "
-                        "Common causes: \n  1) entities include trailing whitespaces or punctuation"
+                        "Common causes: \n  1) entities include trailing whitespaces "
+                        "or punctuation"
                         "\n  2) the tokenizer gives an unexpected result, due to "
-                        "languages such as Chinese that don't use whitespace for word separation",
+                        "languages such as Chinese that don't use whitespace for word "
+                        "separation",
                         docs=DOCS_URL_TRAINING_DATA_NLU,
                     )
                     break
diff --git a/rasa/nlu/extractors/regex_entity_extractor.py b/rasa/nlu/extractors/regex_entity_extractor.py
@@ -42,7 +42,7 @@ def __init__(
         component_config: Optional[Dict[Text, Any]] = None,
         patterns: Optional[List[Dict[Text, Text]]] = None,
     ):
-        """Extracts entities using the lookup tables and/or regexes defined in the training data."""
+        """Extracts entities using the lookup tables and/or regexes defined."""
         super(RegexEntityExtractor, self).__init__(component_config)
 
         self.case_sensitive = self.component_config["case_sensitive"]

diff --git a/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py b/rasa/nlu/featurizers/dense_featurizer/lm_featurizer.py
@@ -348,9 +348,10 @@ def _tokenize_example(
             split_token_ids, split_token_strings = self._lm_tokenize(token.text)
 
             if not split_token_ids:
-                # fix the situation that `token.text` only contains whitespace or other special characters,
-                # which cause `split_token_ids` and `split_token_strings` be empty,
-                # finally cause `self._lm_specific_token_cleanup()` to raise an exception
+                # fix the situation that `token.text` only contains whitespace or other
+                # special characters, which cause `split_token_ids` and
+                # `split_token_strings` be empty, finally cause
+                # `self._lm_specific_token_cleanup()` to raise an exception
                 continue
 
             (split_token_ids, split_token_strings) = self._lm_specific_token_cleanup(

diff --git a/rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/count_vectors_featurizer.py
@@ -370,14 +370,17 @@ def _update_vectorizer_vocabulary(
 
         Args:
             attribute: Message attribute for which vocabulary should be updated.
-            new_vocabulary: Set of words to expand the vocabulary with if they are unseen.
+            new_vocabulary: Set of words to expand the vocabulary with if they are
+                unseen.
         """
         existing_vocabulary: Dict[Text, int] = self.vectorizers[attribute].vocabulary
         if len(new_vocabulary) > len(existing_vocabulary):
             rasa.shared.utils.io.raise_warning(
-                f"New data contains vocabulary of size {len(new_vocabulary)} for attribute {attribute} "
-                f"which is larger than the maximum vocabulary size({len(existing_vocabulary)}) "
-                f"of the original model. Some tokens will have to be dropped "
+                f"New data contains vocabulary of size {len(new_vocabulary)} for "
+                f"attribute {attribute} "
+                f"which is larger than the maximum vocabulary size "
+                f"({len(existing_vocabulary)}) of the original model. "
+                f"Some tokens will have to be dropped "
                 f"in order to continue training. It is advised to re-train the "
                 f"model from scratch on the complete data."
             )
@@ -410,11 +413,14 @@ def _get_additional_vocabulary_size(
         current vocabulary size.
 
         Args:
-            attribute: Message attribute for which additional vocabulary size should be computed.
-            existing_vocabulary_size: Current size of vocabulary learnt from the training data.
+            attribute: Message attribute for which additional vocabulary size should
+                be computed.
+            existing_vocabulary_size: Current size of vocabulary learnt from the
+                training data.
 
         Returns:
-            Size of additional vocabulary that should be set aside for incremental training.
+            Size of additional vocabulary that should be set aside for incremental
+            training.
         """
         # Vocabulary expansion for INTENTS, ACTION_NAME
         # and INTENT_RESPONSE_KEY is currently not supported as
@@ -442,7 +448,8 @@ def _add_buffer_to_vocabulary(self, attribute: Text) -> None:
         So for example - buf_1, buf_2, buf_3... and so on.
 
         Args:
-            attribute: Name of the attribute for which the vocabulary should be expanded.
+            attribute: Name of the attribute for which the vocabulary should be
+            expanded.
         """
         original_vocabulary = self.vectorizers[attribute].vocabulary_
         current_vocabulary_size = len(original_vocabulary)
@@ -552,7 +559,7 @@ def _train_with_independent_vocab(
                 )
 
     def _log_vocabulary_stats(self, attribute: Text) -> None:
-        """Logs number of vocabulary slots filled out of the total number of available slots.
+        """Logs number of vocabulary slots filled out of the total available ones.
 
         Args:
             attribute: Message attribute for which vocabulary stats are logged.

diff --git a/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py b/rasa/nlu/featurizers/sparse_featurizer/lexical_syntactic_featurizer.py
@@ -166,8 +166,8 @@ def _create_sparse_features(self, message: Message) -> None:
         import scipy.sparse
 
         tokens = message.get(TOKENS_NAMES[TEXT])
-        # this check is required because there might be training data examples without TEXT,
-        # e.g., `Message("", {action_name: "action_listen"})`
+        # this check is required because there might be training data examples without
+        # TEXT, e.g., `Message("", {action_name: "action_listen"})`
         if tokens:
 
             sentence_features = self._tokens_to_features(tokens)