Skip to content

getml.pipeline.SQLCode

SQLCode(
    code: Sequence[Union[str, SQLString]],
    dialect: str = sqlite3,
)

Custom class for handling the SQL code of the features generated by the pipeline.

PARAMETER DESCRIPTION
code

The SQL code of the features.

TYPE: Sequence[Union[str, SQLString]]

dialect

The SQL dialect used in the code. Default is 'sqlite3'.

TYPE: str DEFAULT: sqlite3

Example
sql_code = my_pipeline.features.to_sql()

# You can access individual features
# by index.
feature_1_1 = sql_code[0]

# You can also access them by name.
feature_1_10 = sql_code["FEATURE_1_10"]

# You can also type the name of
# a table or column to find all
# features related to that table
# or column.
features = sql_code.find("SOME_TABLE")

# HINT: The generated SQL code always
# escapes table and column names using
# quotation marks. So if you want exact
# matching, you can do this:
features = sql_code.find('"SOME_TABLE"')
Source code in getml/pipeline/sql_code.py
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
def __init__(
    self,
    code: Sequence[Union[str, SQLString]],
    dialect: str = sqlite3,
) -> None:
    if not _is_typed_list(code, str):
        raise TypeError("'code' must be a list of str.")

    self.code = [SQLString(elem) for elem in code]

    self.dialect = dialect

    self.tables = [
        _edit_table_name(table_name)
        for table_name in re.findall(_table_pattern(self.dialect), "".join(code))
    ]

find

find(keyword: str) -> SQLCode

Returns the SQLCode for all features containing the keyword.

PARAMETER DESCRIPTION
keyword

The keyword to be found.

TYPE: str

RETURNS DESCRIPTION
SQLCode

The SQL code for all features containing the keyword.

Source code in getml/pipeline/sql_code.py
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
def find(self, keyword: str) -> SQLCode:
    """
    Returns the SQLCode for all features
    containing the keyword.

    Args:
        keyword: The keyword to be found.

    Returns:
        The SQL code for all features containing the keyword.
    """
    if not isinstance(keyword, str):
        raise TypeError("'keyword' must be a str.")

    return SQLCode([elem for elem in self.code if keyword in elem], self.dialect)

save

save(
    fname: str, split: bool = True, remove: bool = False
) -> None

Saves the SQL code to a file.

PARAMETER DESCRIPTION
fname

The name of the file or folder (if split==True) in which you want to save the features.

TYPE: str

split

If True, the code will be split into multiple files, one for each feature and saved into a folder fname.

TYPE: bool DEFAULT: True

remove

If True, the existing SQL files in fname folder generated previously with the save method will be removed.

TYPE: bool DEFAULT: False

Source code in getml/pipeline/sql_code.py
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
def save(self, fname: str, split: bool = True, remove: bool = False) -> None:
    """
    Saves the SQL code to a file.

    Args:
        fname:
            The name of the file or folder (if `split==True`)
            in which you want to save the features.

        split:
            If True, the code will be split into multiple files, one for
            each feature and saved into a folder `fname`.

        remove:
            If True, the existing SQL files in `fname` folder generated
            previously with the save method will be removed.
    """
    if not split:
        with open(fname, "w", encoding="utf-8") as sqlfile:
            sqlfile.write(str(self))
        return

    directory = Path(fname)

    if directory.exists():
        iter_dir = os.listdir(fname)

        pattern = r"^\d{4}.*\_.*\.sql$"

        exist_files_path = [fp for fp in iter_dir if re.search(pattern, fp)]

        if not remove and exist_files_path:
            print(f"The following files already exist in the directory ({fname}):")
            for fp in np.sort(exist_files_path):
                print(fp)
            print("Please set 'remove=True' to remove them.")
            return

        if remove and exist_files_path:
            for fp in exist_files_path:
                os.remove(fname + "/" + fp)

    directory.mkdir(exist_ok=True)

    for index, code in enumerate(self.code, 1):
        match = re.search(_table_pattern(self.dialect), str(code))
        name = _edit_table_name(match.group(1).lower()) if match else "feature"
        name = _edit_windows_filename(name).replace(".", "_").replace("`", "")
        file_path = directory / f"{index:04d}_{name}.sql"
        with open(file_path, "w", encoding="utf-8") as sqlfile:
            sqlfile.write(str(code))

to_str

to_str() -> str

Returns a raw string representation of the SQL code.

RETURNS DESCRIPTION
str

A raw string representation of the SQL code.

Source code in getml/pipeline/sql_code.py
181
182
183
184
185
186
187
188
def to_str(self) -> str:
    """
    Returns a raw string representation of the SQL code.

    Returns:
        A raw string representation of the SQL code.
    """
    return str(self)