K
K
Kairat Ubukulov2018-08-09 08:02:18
go
Kairat Ubukulov, 2018-08-09 08:02:18

How to parse yml file with Windows-1251 encoding?

There is a file in yml format on a remote FTP server. You need to parse it correctly. There is code that works for UTF8 encoding. Where am I doing it wrong?

func DownloadPowerFile(url string) *PowerCatalog {
  list := PowerCatalog{}

  ftpClient, err := ftp.Dial(globals.GlobalSettings.FtpAddress)

  defer ftpClient.Quit()

  err = ftpClient.Login(globals.GlobalSettings.FtpUser, globals.GlobalSettings.FtpPassword)
  err = ftpClient.ChangeDir("/U16-user/")

  resp, _ := ftpClient.Retr(url) // Retr выдает команду RETR FTP для извлечения указанного файла с удаленного FTP сервера

  //sr := strings.NewReader(url)
  //tr := transform.NewReader(resp, charmap.Windows1251.NewDecoder())

  body, err := ioutil.ReadAll(resp)

  if err != nil {
    return &list
  }
  newbody := strings.Replace(string(body), "<offers>", "", -1)
  newbody = strings.Replace(newbody, "</offers>", "", -1)
  err = xml.Unmarshal([]byte(newbody), &list)
  if err != nil {
    fmt.Println(err)
    return &list
  }
  fmt.Println("OKOK")
  return &list
}

Answer the question

In order to leave comments, you need to log in

2 answer(s)
K
Kairat Ubukulov, 2018-08-14
@ubukulov

The problem was solved in a different way. I propose a method below. We did the conversion using the Linux command.

func DownloadPowerFile(url string) *PowerCatalog {
  list := PowerCatalog{}
  // скачиваем файл по ссылке и сохраняем в папку files
  err := DownloadFilePower("Powerplant_Talapai.yml", url)
  if err != nil {
    fmt.Println(err)
    return &list
  }
  var path string
  path = "/root/go-workpath/src/stocks/files/"
  // переконвертируем файл
  cmd := exec.Command("iconv", "-f", "cp1251", "-t", "utf8",  path + "Powerplant_Talapai.yml", "-o", path + "Powerplant_Talapai8.yml")
  out, err := cmd.CombinedOutput()
  if err != nil {
    fmt.Printf("cmd.Run() failed with %s\n", err)
  }
  fmt.Printf("combined out:\n%s\n", string(out))
  // из файла Powerplant_Talapai8.yml убираем строку encoding="windows-1251". Надо проверит в наличие программу SED
  cmd2 := exec.Command("sed", "-i", "s/ encoding=\"windows-1251\"//", path + "Powerplant_Talapai8.yml")

  out2, err2 := cmd2.CombinedOutput()
  if err2 != nil {
    fmt.Printf("cmd.Run() failed with %s\n", err2)
  }
  fmt.Printf("combined out:\n%s\n", string(out2))
  // открытие файла
  resp, err := os.Open(path + "Powerplant_Talapai8.yml")

  if(err != nil) {
    fmt.Println("Невозможно найти файл или открыть")
    return &list
  } else {

    defer resp.Close()

    body, err := ioutil.ReadAll(resp)

    if err != nil {
      return &list
    }

    newbody := strings.Replace(string(body), "<offers>", "", -1)
    newbody = strings.Replace(newbody, "</offers>", "", -1)
    err = xml.Unmarshal([]byte(newbody), &list)

    if err != nil {
      fmt.Println(err)
      return &list
    }

    return &list
  }
}

func DownloadFilePower(filepath string, url string) error {

  // Create the file
  out, err := os.Create("files/"+filepath)
  if err != nil {
    return err
  }
  defer out.Close()

  // Get the data
  resp, err := http.Get(url)
  if err != nil {
    return err
  }
  defer resp.Body.Close()

  // Write the body to file
  _, err = io.Copy(out, resp.Body)
  if err != nil {
    return err
  }

  return nil
}

P
Papa, 2018-08-09
Stifflera @PapaStifflera

There is a Decoder.CharsetReader field for this:
// CharsetReader, if non-nil, defines a function to generate
// charset-conversion readers, converting from the provided
// non-UTF-8 charset into UTF-8. If CharsetReader is nil or
// returns an error, parsing stops with an error. One of the
// the CharsetReader's result values ​​must be non-nil.
CharsetReader func(charset string, input io.Reader) (io.Reader, error)
You can use it like this:

d := xml.NewDecoder(xmlFile)
d.CharsetReader = func(charset string, input io.Reader) (io.Reader, error) {
    switch charset {
    case "windows-1251":
        return charmap.Windows1251.NewDecoder().Reader(input), nil
    default:
        return nil, fmt.Errorf("unknown charset: %s", charset)
    }
}
err := d.Decode(&zlList)

https://ru.stackoverflow.com/questions/713777/gola...

Didn't find what you were looking for?

Ask your question

Ask a Question

731 491 924 answers to any question