sbinet / go-python

naive go bindings to the CPython2 C-API
Other
1.52k stars 138 forks source link

Pandas and go Python are incompatible? #115

Closed dil1gent closed 2 years ago

dil1gent commented 2 years ago

I introduced the pandas package into my Python function, and then used read_ csv function can be executed normally and r eturns 25. If you call this function through go, it will return - 1. Why?

sbinet commented 2 years ago

what exactly did you try? could you post a simple reproducer of this issue? (also, do note that sbinet/go-python only support CPython-2, not CPython-3)

dil1gent commented 2 years ago

what exactly did you try? could you post a simple reproducer of this issue? (also, do note that sbinet/go-python only support CPython-2, not CPython-3)


go1.16.4 python 2.7.12

The Cpython version is Cpython2, and it is ok to call python2 programs via GO to return integers directly. Here is the code you can take a look at

  1. go
    
    package main

import ( "github.com/sbinet/go-python" "fmt" )

var (

    pyLogModule     *python.PyObject
    pyTest                 *python.PyObject

)

func init() { err := python.Initialize() if err != nil { panic(err.Error()) } } func main(){ pyInit("creditcard","creditcard1",0) }

func pyInit(datasetName string, dataFile string, epsilon float64) {

    m := python.PyImport_ImportModule("sys")
    sysPath := m.GetAttrString("path")

    python.PyList_Insert(sysPath, 0, python.PyString_FromString("/home/zhaoxinbo/Biscotti/ML/code"))
    fmt.Println(sysPath)

    pyLogModule = python.PyImport_ImportModule("logistic_model2")
    pyTest = pyLogModule.GetAttrString("test2")
    if pyTest == nil {
            fmt.Println("Error importing function")
    }

    pyNumFeatures := pyTest.CallFunction(python.PyString_FromString(datasetName))
    numFeatures := python.PyInt_AsLong(pyNumFeatures)
    fmt.Println(numFeatures)  }

2. python
```python
import pandas
def test2(dataset_name ):

    #return 256
    return int(pd.read_csv("../data/creditcard.csv").shape[1])
sbinet commented 2 years ago

is that this exact python code? because you import pandas as pandas and try to use it as pd. (-1 sounds like a typical CPython-2 error value that one might get on the C side when e.g. an import fails)

dil1gent commented 2 years ago

is that this exact python code? because you import pandas as pandas and try to use it as pd. (-1 sounds like a typical CPython-2 error value that one might get on the C side when e.g. an import fails)

—————————————————————————————————————— Sorry, python source code is import Pandas as pd, accidentally deleted when copying up. I wonder if it is caused by the incompatibility between language versions or the incompatibility of Pandas package

dil1gent commented 2 years ago

is that this exact python code? because you import pandas as pandas and try to use it as pd. (-1 sounds like a typical CPython-2 error value that one might get on the C side when e.g. an import fails)

Maybe you can help me run the above code to test whether the same error message will appear. If GO-Python can be used normally, my current project will save a lot of effort, really bother you

sbinet commented 2 years ago

hi,

with the following code:

package main

import (
    "fmt"
    "os"
    "path/filepath"

    "github.com/sbinet/go-python"
)

var (
    pyLogModule *python.PyObject
    pyTest      *python.PyObject
)

func init() {
    err := python.Initialize()
    if err != nil {
        panic(err.Error())
    }
}
func main() {
    run()
}

func run() {
    dir, err := os.MkdirTemp("", "go-python-")
    if err != nil {
        panic(fmt.Errorf("could not create temp dir: %+v", err))
    }
    defer os.RemoveAll(dir)

    fname := filepath.Join(dir, "data.csv")
    err = os.WriteFile(fname, []byte(data), 0644)
    if err != nil {
        panic(fmt.Errorf("could not create data file: %+v", err))
    }

    m := python.PyImport_ImportModule("sys")
    sysPath := m.GetAttrString("path")

    python.PyList_Insert(sysPath, 0, python.PyString_FromString(dir))
    fmt.Println(sysPath)

    err = os.WriteFile(filepath.Join(dir, "m.py"), []byte(module), 0644)
    if err != nil {
        panic(fmt.Errorf("could not create pandas module: %+v", err))
    }

    pyLogModule = python.PyImport_ImportModule("m")
    if pyLogModule == nil {
        panic(fmt.Errorf("could not import module"))
    }

    pyTest = pyLogModule.GetAttrString("test")
    if pyTest == nil {
        fmt.Println("Error importing function")
        panic(fmt.Errorf("could not import function"))
    }

    pyNumFeatures := pyTest.CallFunction(python.PyString_FromString(fname))
    if pyNumFeatures == nil {
        exc, val, tb := python.PyErr_Fetch()
        fmt.Printf("exc=%v\nval=%v\ntb=%v\n", exc, val, tb)
        panic(fmt.Errorf("could not call function"))
    }

    numFeatures := python.PyInt_AsLong(pyNumFeatures)
    fmt.Println(numFeatures)
}

const module = `
import pandas as pd

def test(fname):
    print("fname: %s, pandas: %s" % (fname, pd.__file__))
    return int(pd.read_csv(fname).shape[1])
`

const data = `1,2,3,4,hello
5,6,7,8,world
`

in an ubuntu container (docker run --rm -it ubuntu:18.04), I get:

$> go run ./main.go 
['/tmp/go-python-3822668890', '/usr/lib/python2.7', '/usr/lib/python2.7/plat-x86_64-linux-gnu', '/usr/lib/python2.7/lib-tk', '/usr/lib/python2.7/lib-old', '/usr/lib/python2.7/lib-dynload', '/usr/local/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages']
fname: /tmp/go-python-3822668890/data.csv, pandas: /usr/lib/python2.7/dist-packages/pandas/__init__.pyc
5
dil1gent commented 2 years ago

hi,

with the following code:

package main

import (
  "fmt"
  "os"
  "path/filepath"

  "github.com/sbinet/go-python"
)

var (
  pyLogModule *python.PyObject
  pyTest      *python.PyObject
)

func init() {
  err := python.Initialize()
  if err != nil {
      panic(err.Error())
  }
}
func main() {
  run()
}

func run() {
  dir, err := os.MkdirTemp("", "go-python-")
  if err != nil {
      panic(fmt.Errorf("could not create temp dir: %+v", err))
  }
  defer os.RemoveAll(dir)

  fname := filepath.Join(dir, "data.csv")
  err = os.WriteFile(fname, []byte(data), 0644)
  if err != nil {
      panic(fmt.Errorf("could not create data file: %+v", err))
  }

  m := python.PyImport_ImportModule("sys")
  sysPath := m.GetAttrString("path")

  python.PyList_Insert(sysPath, 0, python.PyString_FromString(dir))
  fmt.Println(sysPath)

  err = os.WriteFile(filepath.Join(dir, "m.py"), []byte(module), 0644)
  if err != nil {
      panic(fmt.Errorf("could not create pandas module: %+v", err))
  }

  pyLogModule = python.PyImport_ImportModule("m")
  if pyLogModule == nil {
      panic(fmt.Errorf("could not import module"))
  }

  pyTest = pyLogModule.GetAttrString("test")
  if pyTest == nil {
      fmt.Println("Error importing function")
      panic(fmt.Errorf("could not import function"))
  }

  pyNumFeatures := pyTest.CallFunction(python.PyString_FromString(fname))
  if pyNumFeatures == nil {
      exc, val, tb := python.PyErr_Fetch()
      fmt.Printf("exc=%v\nval=%v\ntb=%v\n", exc, val, tb)
      panic(fmt.Errorf("could not call function"))
  }

  numFeatures := python.PyInt_AsLong(pyNumFeatures)
  fmt.Println(numFeatures)
}

const module = `
import pandas as pd

def test(fname):
  print("fname: %s, pandas: %s" % (fname, pd.__file__))
  return int(pd.read_csv(fname).shape[1])
`

const data = `1,2,3,4,hello
5,6,7,8,world
`

in an ubuntu container (docker run --rm -it ubuntu:18.04), I get:

$> go run ./main.go 
['/tmp/go-python-3822668890', '/usr/lib/python2.7', '/usr/lib/python2.7/plat-x86_64-linux-gnu', '/usr/lib/python2.7/lib-tk', '/usr/lib/python2.7/lib-old', '/usr/lib/python2.7/lib-dynload', '/usr/local/lib/python2.7/dist-packages', '/usr/lib/python2.7/dist-packages']
fname: /tmp/go-python-3822668890/data.csv, pandas: /usr/lib/python2.7/dist-packages/pandas/__init__.pyc
5

Thanks a lot. I got the same result on my server. I will try to integrate it into the project tomorrow.

dil1gent commented 2 years ago

My biggest mistake was the path to open the file in Python: I used a relative path. If you need to use a relative path, it should be relative to the go file or gobin file, not relative to the Python file. This is why Python files can be executed on their own, while go calls cannot.

sbinet commented 2 years ago

let me interpret this as a closed issue.

feel free to reopen otherwise.