GH-300-GitHub-Copilot-Demos/data_processor.py at main · SQLtattoo/GH-300-GitHub-Copilot-Demos · GitHub

1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
"""
Data processor module for string and list operations.
Demonstrates Copilot's ability to work with data structures.
"""

import ast
import re


class DataProcessor:
    """Handles various data processing operations."""

    def __init__(self):
        """Initialize the data processor."""
        self.processed_count = 0

    def reverse_string(self, text: str) -> str:
        """Reverse a string."""
        self.processed_count += 1
        return text[::-1]

    # TODO: Create a method that checks if a string is a palindrome
    # Should ignore case and spaces

    # TODO: Create a method that counts the number of vowels in a string
    # Should handle both uppercase and lowercase

    def remove_duplicates(self, items: list) -> list:
        """Remove duplicate items from a list while preserving order."""
        self.processed_count += 1
        seen = set()
        result = []
        for item in items:
            if item not in seen:
                seen.add(item)
                result.append(item)
        return result

    # TODO: Create a method called 'find_common_elements' that finds common elements between two lists

    # TODO: Create a method that sorts a list of dictionaries by a specified key

    def chunk_list(self, items: list, chunk_size: int) -> list:
        """Split a list into chunks of specified size."""
        if chunk_size <= 0:
            raise ValueError("Chunk size must be positive")
        self.processed_count += 1
        return [items[i:i + chunk_size] for i in range(0, len(items), chunk_size)]

    # TODO: Create a method that flattens a nested list structure
    # Example: [[1, 2], [3, [4, 5]]] -> [1, 2, 3, 4, 5]

    def get_processed_count(self) -> int:
        """Return the number of operations processed."""
        return self.processed_count

    # BUG: Doesn't handle edge cases properly
    def count_words(self, text: str) -> int:
        """Count words in a text."""
        # BUG: Counts empty strings and multiple spaces incorrectly
        return len(text.split(' '))

    # PERFORMANCE ISSUE: O(n²) complexity - should use set for O(n)
    def find_duplicates_slow(self, items: list) -> list:
        """Find duplicate items in a list (inefficient version)."""
        # PERFORMANCE: This is O(n²) and could be much faster
        duplicates = []
        for i, item in enumerate(items):
            for j, other in enumerate(items):
                if i != j and item == other and item not in duplicates:
                    duplicates.append(item)
        return duplicates

    def process_data(self, data: list) -> list:
        """
        Process data by doubling each element.

        Args:
            data (list): List of numeric values to process

        Returns:
            list: New list with each element doubled

        Raises:
            TypeError: If data is not iterable or contains non-numeric values

        Example:
            >>> processor = DataProcessor()
            >>> processor.process_data([1, 2, 3])
            [2, 4, 6]
        """
        if not hasattr(data, '__iter__'):
            raise TypeError("Data must be iterable")
        result = []
        for x in data:
            try:
                result.append(x * 2)
            except TypeError:
                raise TypeError("All elements must support multiplication")
        return result

    def calculate_expression(self, expression: str) -> float:
        """
        Calculate a mathematical expression from string.

        Only supports basic arithmetic operations (+, -, *, /, parentheses).
        Uses safe parsing instead of eval to prevent code injection.

        Args:
            expression (str): Mathematical expression to evaluate

        Returns:
            float: Result of the calculation

        Raises:
            ValueError: If expression contains invalid characters or syntax
        """
        # Remove whitespace
        expression = expression.replace(' ', '')

        # Validate expression contains only safe characters
        if not re.match(r'^[0-9+\-*/().\s]+$', expression):
            raise ValueError("Expression contains invalid characters. Only numbers and +, -, *, /, () are allowed.")

        try:
            # Use ast.literal_eval for simple numeric literals, or compile and evaluate safely
            # For basic arithmetic, we parse the AST and evaluate only math operations
            tree = ast.parse(expression, mode='eval')
            return self._eval_expr(tree.body)
        except SyntaxError as e:
            raise ValueError(f"Invalid expression: {e}")
        except ValueError:
            raise
        except Exception as e:
            raise ValueError(f"Invalid expression: {e}")

    def _eval_expr(self, node):
        """
        Safely evaluate an AST node containing only arithmetic operations.

        Args:
            node: AST node to evaluate (ast.Num, ast.BinOp, ast.UnaryOp, or ast.Constant)

        Returns:
            float: The evaluated result of the expression

        Raises:
            ValueError: If node contains unsupported operations or expression types
        """
        if isinstance(node, ast.Num):  # <number>
            return node.n
        elif isinstance(node, ast.BinOp):  # <left> <operator> <right>
            left = self._eval_expr(node.left)
            right = self._eval_expr(node.right)
            if isinstance(node.op, ast.Add):
                return left + right
            elif isinstance(node.op, ast.Sub):
                return left - right
            elif isinstance(node.op, ast.Mult):
                return left * right
            elif isinstance(node.op, ast.Div):
                if right == 0:
                    raise ValueError("Division by zero")
                return left / right
            else:
                raise ValueError(f"Unsupported operation: {type(node.op).__name__}")
        elif isinstance(node, ast.UnaryOp):  # <operator> <operand> e.g., -1
            operand = self._eval_expr(node.operand)
            if isinstance(node.op, ast.USub):
                return -operand
            elif isinstance(node.op, ast.UAdd):
                return +operand
            else:
                raise ValueError(f"Unsupported unary operation: {type(node.op).__name__}")
        elif isinstance(node, ast.Constant):  # Python 3.8+
            return node.value
        else:
            raise ValueError(f"Unsupported expression type: {type(node).__name__}")

    # BUG: Edge case not handled
    def get_last_n_items(self, items: list, n: int) -> list:
        """Get the last n items from a list."""
        # BUG: Doesn't handle when n > len(items)
        # BUG: Doesn't handle negative n values
        return items[-n:]

    def merge_dictionaries(self, dict1: dict, dict2: dict) -> dict:
        """
        Merge two dictionaries, with dict2 values overwriting dict1 on key conflicts.

        Creates a new dictionary containing all keys from both input dictionaries.
        If a key exists in both dictionaries, the value from dict2 takes precedence.

        Args:
            dict1 (dict): First dictionary to merge
            dict2 (dict): Second dictionary to merge (takes precedence on conflicts)

        Returns:
            dict: New dictionary containing merged key-value pairs

        Example:
            >>> processor = DataProcessor()
            >>> d1 = {'a': 1, 'b': 2}
            >>> d2 = {'b': 3, 'c': 4}
            >>> processor.merge_dictionaries(d1, d2)
            {'a': 1, 'b': 3, 'c': 4}
        """
        result = dict1.copy()
        result.update(dict2)
        return result


def to_title_case(text: str) -> str:
    """
    Convert a string to title case.

    Capitalizes the first letter of each word and converts all other letters to lowercase.

    Args:
        text (str): The string to convert to title case

    Returns:
        str: The string converted to title case

    Example:
        >>> to_title_case("hello world")
        'Hello World'
        >>> to_title_case("HELLO world")
        'Hello World'
    """
    return text.title()


def find_most_frequent(items: list):
    """
    Find the most frequent element in a list.

    If there's a tie, returns the first one encountered.

    Args:
        items (list): List of items to analyze

    Returns:
        The most frequent element in the list

    Raises:
        ValueError: If the list is empty

    Example:
        >>> find_most_frequent([1, 2, 2, 3, 3, 3])
        3
        >>> find_most_frequent(['a', 'b', 'a', 'c'])
        'a'
    """
    if not items:
        raise ValueError("Cannot find most frequent element in empty list")

    frequency = {}
    for item in items:
        frequency[item] = frequency.get(item, 0) + 1

    max_count = 0
    most_frequent = None

    for item in items:
        if frequency[item] > max_count:
            max_count = frequency[item]
            most_frequent = item

    return most_frequent