Friday, July 23, 2021

Assembly of Python External C++ procedure returning the value of string type

Writing C++ procedure below we get a final answer as C++ string , then via sequence of operations which convert string to  the pointer (say c) to "const char" and finally return required value via pointer to PyObject provided  by PyUnicode_FromString(c) to Python Runtime module.

    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    return result;

Original task sounds like ( "Eugene Djobs" Python online trainer)

The text file contains only capital letters of the Latin alphabet (ABC ... Z). Identify the most common character in the file immediately after the letter X. In the answer, write down this symbol first, and then immediately (no separator) how many times it occurs after the letter X.

Python Code for this task 

with open('24-B251220.txt') as f:

  lines = f.readlines()

  for line in lines:


     count = [0]*26

     maxChar, maxFreq = ' ', 0

     for i in range(26):

        for j in range(1, len(line)):

           if line[j-1] == 'X' and line[j] == letters[i]:

              count[i] += 1

     for i in range(26):

       if  maxFreq < count[i]:

        maxFreq = count[i]

        maxChar = letters[i]


where 24-B251220.txt is just a row of length 1000000 bytes

If we would loop 50 times 24-B251220.txt >> 24-C251220.txt then we could wait for a while (around 3.5 min) Python module above to exit
Create following test.cpp file for Python to C++ extension :

#include <iostream>
#include <fstream>
#include <string>
#include <cstddef>
#include <Python.h>
#pragma GCC diagnostic ignored "-Wsign-compare"
#pragma GCC diagnostic ignored "-Wreturn-type"

extern "C"{}
namespace {

static PyObject* XFrecuencyLetter(PyObject* self, PyObject* args)
   char *filename = NULL;
   int idx = 0;

   if(!PyArg_ParseTuple(args, "is", &idx, &filename)) {
        return NULL;

   std::string letters = "QWERTYUIOPASDFGHJKLZXCVBNM";
   int count [26] = {};
   int maxFreq = 0;
   char maxChar;
   std::fstream newfile;,std::ios::in);

   if (newfile.is_open()){
   std::string tp;
    while (getline(newfile, tp))
      for(int i=1; i < 26;i++)
       for(int j=1; j <= tp.size(); j++)
          if ( == alpha[idx]  && ==
              count[i] += 1;
      for(int i=1; i < 26;i++)
        if (maxFreq < count[i])
          maxFreq = count[i];
          maxChar = letters[i];
    std::string answer = maxChar + std::to_string(maxFreq);
    const char *ars = answer.c_str();
    PyObject *result = PyUnicode_FromString(ars);
    return result;

static PyMethodDef myMethods[] = {
    { "XFrecuencyLetter",XFrecuencyLetter,METH_VARARGS, "Prints frequency letter" },
    { NULL, NULL, 0, NULL }

static struct PyModuleDef myModule = {
    "Test Module",

PyMODINIT_FUNC PyInit_myModule(void)
    return PyModule_Create(&myModule);
} //namespace

Now create as follows to be able analyzed the question for any letter different from 'X'

(.env) [boris@fedora33server DJS24251220]$ cat
import myModule
Z = input("Input Letter you are concerned about : ")
alpha = ['Q','W','E','R','T','Y',',U','I','O','P','A','S','D','F','G','H','J','K','L','Z','X','C','V','B','N','M']
index = alpha.index(Z)
file = input("Input filename : " )

[boris@fedora33server DJS24251220]$ cat
from distutils.core import setup, Extension                                      
import sysconfig                                                                     
language = 'c++'                                                                     
std = 'c++20'                                                                        
default_compile_args = sysconfig.get_config_var('CFLAGS').split()                    
extra_compile_args = [f"-std={std}", "-Wall", "-Wextra", "-Werror", "-DNDEBUG", "-O3"]
setup(name = 'myModule', version = '1.0',  \
   ext_modules = [Extension('myModule', ['test.cpp'])])

Build and install required shared library
(.env) [boris@fedora33server DJS24251220]$ python install
running install
running build
running build_ext
building 'myModule' extension
creating build
creating build/temp.linux-x86_64-3.9
gcc -pthread -Wno-unused-result -Wsign-compare -DDYNAMIC_ANNOTATIONS_ENABLED=1 -DNDEBUG -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -O2 -fexceptions -g -grecord-gcc-switches -pipe -Wall -Werror=format-security -Wp,-D_FORTIFY_SOURCE=2 -Wp,-D_GLIBCXX_ASSERTIONS -fstack-protector-strong -m64 -mtune=generic -fasynchronous-unwind-tables -fstack-clash-protection -fcf-protection -D_GNU_SOURCE -fPIC -fwrapv -fPIC -I/home/boris/DJS24251220/.env/include -I/usr/include/python3.9 -c test.cpp -o build/temp.linux-x86_64-3.9/test.o
creating build/lib.linux-x86_64-3.9
g++ -pthread -shared -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g -Wl,-z,relro -Wl,--as-needed -Wl,-z,now -g build/temp.linux-x86_64-3.9/test.o -L/usr/lib64 -o build/lib.linux-x86_64-3.9/
running install_lib
copying build/lib.linux-x86_64-3.9/ -> /home/boris/DJS24251220/.env/lib64/python3.9/site-packages
running install_egg_info
Removing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info
Writing /home/boris/DJS24251220/.env/lib64/python3.9/site-packages/myModule-1.0-py3.9.egg-info

(.env) [boris@fedora33server DJS24251220]$ ll *.txt
-rw-rw-r--. 1 boris boris  1000000 Jul 21 21:33 24-B251220.txt
-rw-rw-r--. 1 boris boris 50000000 Jul 21 23:09 24-C251220.txt

Now test C++ performance via running

(.env) [boris@fedora33server DJS24251220]$ python
Input Letter you are concerned about : X
Input filename : 24-B251220.txt

(.env) [boris@fedora33server DJS24251220]$ python
Input Letter you are concerned about : X
Input filename : 24-C251220.txt

Then runtime for "C" file is around 1-2 sec.

No comments:

Post a Comment