Skip to content

Documentation for ToMeDa

tomeda.t10_extract_dataverse_native_keys

This script is used to extract keys from Dataverse TSV files. It is used in the following way:

logger module-attribute

logger: TraceLogger = getLogger(__name__)

extract_keys

extract_keys(tsv_file: Path) -> list

Extracts keys from a given TSV file.

Parameters:

Name Type Description Default
tsv_file Path

The input TSV file path.

required

Returns:

Type Description
list

A list of keys.

Source code in tomeda/t10_extract_dataverse_native_keys.py
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
def extract_keys(tsv_file: Path) -> list:
    """
    Extracts keys from a given TSV file.

    Parameters
    ----------
    tsv_file : Path
        The input TSV file path.

    Returns
    -------
    list
        A list of keys.
    """

    tsv_file_handle = TomedaFileHandler(tsv_file)
    file_data = tsv_file_handle.read(strip=False)
    file_data = file_data[2:]
    reader = csv.DictReader(file_data, delimiter="\t")

    keys = []
    for row in reader:
        first_column_value = row["#datasetField"]
        if first_column_value.startswith("#controlledVocabulary"):
            break
        keys.append(row["name"])

    return keys

main

main(param: TomedaParameter) -> None

Main function that extracts keys from Dataverse TSV files.

Source code in tomeda/t10_extract_dataverse_native_keys.py
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
def main(param: TomedaParameter) -> None:
    """
    Main function that extracts keys from Dataverse TSV files.
    """
    folder_path = Path(param.tsv_dir[0])
    output_path = Path(param.output)

    if not folder_path.is_dir():
        logging.critical(f"Folder {folder_path} does not exist.")
        return

    if not output_path.is_dir() or not os.access(output_path, os.W_OK):
        logging.critical(
            f"Output folder {output_path} does not exist "
            f"or is not writable."
        )
        return

    for tsv_file in folder_path.glob("*.tsv"):
        keys = extract_keys(tsv_file)
        key_file = output_path / tsv_file.with_suffix(".keys").name
        write_keys(keys, key_file, param.force_overwrite)

write_keys

write_keys(keys: list, file: Path, overwrite: bool) -> None

Writes keys to a given file.

Parameters:

Name Type Description Default
keys list

The list of keys to write.

required
file Path

The output file path.

required
overwrite bool

Whether to overwrite the output file if it exists.

required
Source code in tomeda/t10_extract_dataverse_native_keys.py
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
def write_keys(keys: list, file: Path, overwrite: bool) -> None:
    """
    Writes keys to a given file.

    Parameters
    ----------
    keys : list
        The list of keys to write.
    file : Path
        The output file path.
    overwrite : bool
        Whether to overwrite the output file if it exists.
    """
    key_file_handle = TomedaFileHandler(file, overwrite=overwrite)
    key_file_handle.write(keys)